Merge pull request #119 from snshn/data-url-input

Data URL input
This commit is contained in:
Sunshine 2020-02-23 23:33:25 -05:00 committed by GitHub
commit 00942e0b1d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 465 additions and 89 deletions

View file

@ -52,11 +52,11 @@ environment:
# Nightly 64-bit MSVC
- channel: nightly
target: x86_64-pc-windows-msvc
#cargoflags: --features "unstable"
cargoflags: --features "unstable"
# Nightly 32-bit MSVC
- channel: nightly
target: i686-pc-windows-msvc
#cargoflags: --features "unstable"
cargoflags: --features "unstable"
### GNU Toolchains ###
@ -80,12 +80,12 @@ environment:
- channel: nightly
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
#cargoflags: --features "unstable"
cargoflags: --features "unstable"
# Nightly 32-bit GNU
- channel: nightly
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
#cargoflags: --features "unstable"
cargoflags: --features "unstable"
### Allowed failures ###
@ -124,7 +124,8 @@ install:
build: false
# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs
#directly or perform other testing commands. Rust will automatically be placed in the PATH
# directly or perform other testing commands. Rust will automatically be placed in the PATH
# environment variable.
test_script:
- cargo test --verbose %cargoflags%
- cargo build --all --locked --verbose %cargoflags%
- cargo test --all --locked --verbose %cargoflags%

73
Cargo.lock generated
View file

@ -26,6 +26,18 @@ name = "anyhow"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "assert_cmd"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-tree 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "async-compression"
version = "0.2.0"
@ -134,6 +146,16 @@ dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "difference"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "doc-comment"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "dtoa"
version = "0.4.4"
@ -147,6 +169,17 @@ dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "escargot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "flate2"
version = "1.0.13"
@ -512,6 +545,7 @@ dependencies = [
name = "monolith"
version = "2.1.2"
dependencies = [
"assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -683,6 +717,29 @@ name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "predicates"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "predicates-core"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "predicates-tree"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "proc-macro2"
version = "1.0.6"
@ -941,6 +998,9 @@ dependencies = [
name = "serde"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_derive"
@ -1131,6 +1191,11 @@ name = "tower-service"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "treeline"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "try-lock"
version = "0.2.2"
@ -1372,6 +1437,7 @@ dependencies = [
"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum anyhow 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c"
"checksum assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6283bac8dd7226470d491bc4737816fea4ca1fba7a2847f2e9097fd6bfb4624c"
"checksum async-compression 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2c5c52622726d68ec35fec88edfb4ccb862d4f3b3bfa4af2f45142e69ef9b220"
"checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
@ -1387,8 +1453,11 @@ dependencies = [
"checksum core-foundation 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "25b9e03f145fd4f2bf705e07b900cd41fc636598fe5dc452fd0db1441c3f496d"
"checksum core-foundation-sys 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e7ca8a5221364ef15ce201e8ed2f609fc312682a8f4e0e3d4aa5879764e0fa3b"
"checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97"
"checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e"
"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9"
"checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d"
"checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
@ -1451,6 +1520,9 @@ dependencies = [
"checksum pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)" = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677"
"checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
"checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
"checksum predicates 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a9bfe52247e5cc9b2f943682a85a5549fb9662245caf094504e69a2f03fe64d4"
"checksum predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178"
"checksum predicates-tree 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124"
"checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27"
"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
@ -1499,6 +1571,7 @@ dependencies = [
"checksum tokio-tls 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bde02a3a5291395f59b06ec6945a3077602fac2b07eeeaf0dee2122f3619828"
"checksum tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "571da51182ec208780505a32528fc5512a8fe1443ab960b3f2f3ef093cd16930"
"checksum tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860"
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
"checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382"
"checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"

View file

@ -23,3 +23,6 @@ url = "2.1.1"
version = "0.10.*"
default-features = false
features = ["default-tls", "blocking", "gzip"]
[dev-dependencies]
assert_cmd = "0.12.0"

View file

@ -1,16 +1,21 @@
.PHONY: all build install run test lint
#!/usr/bin/make -f
all: test build
all: test
.PHONY: all
build:
@cargo build --locked
.PHONY: build
install:
@cargo install --force --locked --path .
.PHONY: install
test:
test: build
@cargo test --locked
@cargo fmt --all -- --check
.PHONY: test
lint:
@cargo fmt --all --
.PHONY: lint

View file

@ -21,7 +21,7 @@ const DEFAULT_USER_AGENT: &str =
impl AppArgs {
pub fn get() -> AppArgs {
let app = App::new("monolith")
let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!())
.author(crate_authors!("\n"))
.about(crate_description!())

View file

@ -1,7 +1,7 @@
use crate::http::retrieve_asset;
use crate::js::attr_is_event_handler;
use crate::utils::{
data_to_dataurl, is_valid_url, resolve_css_imports, resolve_url, url_has_protocol,
data_to_data_url, is_http_url, resolve_css_imports, resolve_url, url_has_protocol,
};
use html5ever::interface::QualName;
use html5ever::parse_document;
@ -130,7 +130,7 @@ pub fn walk_and_embed_assets(
} else {
let href_full_url = resolve_url(&url, attr.value.as_ref())
.unwrap_or_default();
let (favicon_dataurl, _) = retrieve_asset(
let (favicon_data_url, _) = retrieve_asset(
cache,
client,
&href_full_url,
@ -140,7 +140,7 @@ pub fn walk_and_embed_assets(
)
.unwrap_or_default();
attr.value.clear();
attr.value.push_slice(favicon_dataurl.as_str());
attr.value.push_slice(favicon_data_url.as_str());
}
}
}
@ -229,14 +229,14 @@ pub fn walk_and_embed_assets(
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(TRANSPARENT_PIXEL),
});
} else if let Some((dataurl, _)) = found_datasrc
} else if let Some((data_url, _)) = found_datasrc
.iter()
.chain(&found_src) // Give dataurl priority
.chain(&found_src) // Give data_url priority
.map(|attr| attr.value.trim())
.filter(|src| !src.is_empty()) // Ignore empty srcs
.next()
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to dataurl
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
@ -246,10 +246,10 @@ pub fn walk_and_embed_assets(
opt_silent,
).ok())
{
// Add the new dataurl src attribute
// Add the new data_url src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(dataurl.as_ref()),
value: Tendril::from_slice(data_url.as_ref()),
});
}
}
@ -270,7 +270,7 @@ pub fn walk_and_embed_assets(
} else {
let srcset_full_url =
resolve_url(&url, attr.value.trim()).unwrap_or_default();
let (source_dataurl, _) = retrieve_asset(
let (source_data_url, _) = retrieve_asset(
cache,
client,
&srcset_full_url,
@ -280,7 +280,7 @@ pub fn walk_and_embed_assets(
)
.unwrap_or((str!(), str!()));
attr.value.clear();
attr.value.push_slice(source_dataurl.as_str());
attr.value.push_slice(source_data_url.as_str());
}
}
}
@ -334,7 +334,7 @@ pub fn walk_and_embed_assets(
if &attr.name.local == "src" {
let src_full_url =
resolve_url(&url, attr.value.trim()).unwrap_or_default();
let (js_dataurl, _) = retrieve_asset(
let (js_data_url, _) = retrieve_asset(
cache,
client,
&src_full_url,
@ -344,7 +344,7 @@ pub fn walk_and_embed_assets(
)
.unwrap_or((str!(), str!()));
attr.value.clear();
attr.value.push_slice(js_dataurl.as_str());
attr.value.push_slice(js_data_url.as_str());
}
}
}
@ -377,7 +377,7 @@ pub fn walk_and_embed_assets(
if &attr.name.local == "action" {
let attr_value = attr.value.trim();
// Modify action to be a full URL
if !is_valid_url(attr_value) {
if !is_http_url(attr_value) {
let href_full_url =
resolve_url(&url, attr_value).unwrap_or_default();
attr.value.clear();
@ -426,9 +426,9 @@ pub fn walk_and_embed_assets(
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let iframe_dataurl = data_to_dataurl("text/html", &buf);
let iframe_data_url = data_to_data_url("text/html", &buf);
attr.value.clear();
attr.value.push_slice(iframe_dataurl.as_str());
attr.value.push_slice(iframe_data_url.as_str());
}
}
}
@ -447,7 +447,7 @@ pub fn walk_and_embed_assets(
} else {
let poster_full_url =
resolve_url(&url, video_poster).unwrap_or_default();
let (poster_dataurl, _) = retrieve_asset(
let (poster_data_url, _) = retrieve_asset(
cache,
client,
&poster_full_url,
@ -457,7 +457,7 @@ pub fn walk_and_embed_assets(
)
.unwrap_or((poster_full_url, str!()));
attr.value.clear();
attr.value.push_slice(poster_dataurl.as_str());
attr.value.push_slice(poster_data_url.as_str());
}
}
}

View file

@ -1,4 +1,4 @@
use crate::utils::{clean_url, data_to_dataurl, is_data_url};
use crate::utils::{clean_url, data_to_data_url, is_data_url};
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
@ -7,13 +7,13 @@ pub fn retrieve_asset(
cache: &mut HashMap<String, String>,
client: &Client,
url: &str,
as_dataurl: bool,
as_data_url: bool,
mime: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
let cache_key = clean_url(&url);
if is_data_url(&url).unwrap() {
if is_data_url(&url) {
Ok((url.to_string(), url.to_string()))
} else {
if cache.contains_key(&cache_key) {
@ -38,7 +38,7 @@ pub fn retrieve_asset(
let new_cache_key = clean_url(&res_url);
if as_dataurl {
if as_data_url {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
@ -53,10 +53,10 @@ pub fn retrieve_asset(
} else {
mime
};
let dataurl = data_to_dataurl(&mimetype, &data);
let data_url = data_to_data_url(&mimetype, &data);
// insert in cache
cache.insert(new_cache_key, dataurl.clone());
Ok((dataurl, res_url))
cache.insert(new_cache_key, data_url.clone());
Ok((data_url, res_url))
} else {
let content = response.text().unwrap();
// insert in cache

View file

@ -7,7 +7,7 @@ mod macros;
use crate::args::AppArgs;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::http::retrieve_asset;
use monolith::utils::is_valid_url;
use monolith::utils::{data_url_to_text, is_data_url, is_http_url};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
@ -46,11 +46,14 @@ impl Output {
fn main() {
let app_args = AppArgs::get();
let target_url: &str = app_args.url_target.as_str();
let base_url;
let dom;
if !is_valid_url(app_args.url_target.as_str()) {
if !is_http_url(target_url) && !is_data_url(target_url) {
eprintln!(
"Only HTTP and HTTPS URLs are allowed but got: {}",
&app_args.url_target
"Only HTTP(S) or data URLs are supported but got: {}",
&target_url
);
process::exit(1);
}
@ -78,21 +81,30 @@ fn main() {
.expect("Failed to initialize HTTP client");
// Retrieve root document
let (data, final_url) = retrieve_asset(
&mut cache,
&client,
app_args.url_target.as_str(),
false,
"",
app_args.silent,
)
if is_http_url(target_url) {
let (data, final_url) =
retrieve_asset(&mut cache, &client, target_url, false, "", app_args.silent)
.expect("Could not retrieve assets in HTML");
let dom = html_to_dom(&data);
base_url = final_url;
dom = html_to_dom(&data);
} else if is_data_url(target_url) {
let text: String = data_url_to_text(target_url);
if text.len() == 0 {
eprintln!("Unsupported data URL input");
process::exit(1);
}
base_url = str!();
dom = html_to_dom(&text);
} else {
process::exit(1);
}
walk_and_embed_assets(
&mut cache,
&client,
&final_url,
&base_url,
&dom.document,
app_args.no_css,
app_args.no_js,

182
src/tests/cli.rs Normal file
View file

@ -0,0 +1,182 @@
use assert_cmd::prelude::*;
use std::process::Command;
#[test]
fn print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn bad_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("kernel.org").output().unwrap();
// STDOUT should be empty
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Only HTTP(S) or data URLs are supported but got: kernel.org\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL input\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain isolated HTML
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta></head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
.unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta><style></style></head><body>Hello</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-f")
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no iframes
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta></head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no images
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta></head><body><img src=\"\">Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no JS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta><script></script></head><body>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}

View file

@ -503,7 +503,7 @@ fn test_stringify_document_isolate_no_frames_no_js_no_css_no_images() {
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src \'unsafe-inline\' data:; style-src \'none\'; frame-src \'none\';child-src \'none\'; script-src \'none\'; img-src data:;\"></meta>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; frame-src 'none';child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\

View file

@ -2,7 +2,7 @@ use crate::js::attr_is_event_handler;
#[test]
fn test_attr_is_event_handler() {
// succeeding
// passing
assert!(attr_is_event_handler("onBlur"));
assert!(attr_is_event_handler("onclick"));
assert!(attr_is_event_handler("onClick"));

View file

@ -1,3 +1,4 @@
mod cli;
mod html;
mod http;
mod js;

View file

@ -1,14 +1,14 @@
use crate::utils::{
clean_url, data_to_dataurl, detect_mimetype, is_data_url, is_valid_url, resolve_url,
url_has_protocol,
clean_url, data_to_data_url, data_url_to_text, detect_mimetype, is_data_url, is_http_url,
resolve_url, url_has_protocol,
};
use url::ParseError;
#[test]
fn test_data_to_dataurl() {
fn test_data_to_data_url() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let datauri = data_to_dataurl(mime, data.as_bytes());
let datauri = data_to_data_url(mime, data.as_bytes());
assert_eq!(
&datauri,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
@ -43,7 +43,7 @@ fn test_detect_mimetype() {
#[test]
fn test_url_has_protocol() {
// succeeding
// passing
assert_eq!(
url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
true
@ -71,16 +71,16 @@ fn test_url_has_protocol() {
}
#[test]
fn test_is_valid_url() {
// succeeding
assert!(is_valid_url("https://www.rust-lang.org/"));
assert!(is_valid_url("http://kernel.org"));
fn test_is_http_url() {
// passing
assert!(is_http_url("https://www.rust-lang.org/"));
assert!(is_http_url("http://kernel.org"));
// failing
assert!(!is_valid_url("//kernel.org"));
assert!(!is_valid_url("./index.html"));
assert!(!is_valid_url("some-local-page.htm"));
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
assert!(!is_valid_url(
assert!(!is_http_url("//kernel.org"));
assert!(!is_http_url("./index.html"));
assert!(!is_http_url("some-local-page.htm"));
assert!(!is_http_url("ftp://1.2.3.4/www/index.html"));
assert!(!is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
@ -144,20 +144,35 @@ fn test_resolve_url() -> Result<(), ParseError> {
"https://www.w3schools.com/html/default.asp"
);
let resolved_url = resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "");
Ok(())
}
#[test]
fn test_is_data_url() {
// succeeding
assert!(
is_data_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap_or(false)
);
// passing
assert!(is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
// failing
assert!(!is_data_url("https://kernel.org").unwrap_or(false));
assert!(!is_data_url("//kernel.org").unwrap_or(false));
assert!(!is_data_url("").unwrap_or(false));
assert!(!is_data_url("https://kernel.org"));
assert!(!is_data_url("//kernel.org"));
assert!(!is_data_url(""));
}
#[test]
@ -175,3 +190,25 @@ fn test_clean_url() {
"https://somewhere.com/font.eot"
);
}
#[test]
fn test_data_url_to_text() {
assert_eq!(
data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="),
"Work expands so as to fill the time available for its completion"
);
assert_eq!(
data_url_to_text(
"data:text/html;utf8,Work expands so as to fill the time available for its completion"
),
"Work expands so as to fill the time available for its completion"
);
assert_eq!(
data_url_to_text(
"data:text/html,Work expands so as to fill the time available for its completion"
),
"Work expands so as to fill the time available for its completion"
);
}

View file

@ -1,9 +1,9 @@
use crate::http::retrieve_asset;
use base64::encode;
use base64::{decode, encode};
use regex::Regex;
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::{ParseError, Url};
use url::{form_urlencoded, ParseError, Url};
/// This monster of a regex is used to match any kind of URL found in CSS.
///
@ -37,8 +37,6 @@ use url::{ParseError, Url};
const CSS_URL_REGEX_STR: &str = r###"(?:(?:(?P<stylesheet>@import)|(?P<font>src\s*:))\s+)?url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)"###;
lazy_static! {
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap();
}
@ -67,7 +65,7 @@ const MAGIC: [[&[u8]; 2]; 19] = [
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
pub fn data_to_data_url(mime: &str, data: &[u8]) -> String {
let mimetype = if mime.is_empty() {
detect_mimetype(data)
} else {
@ -82,23 +80,29 @@ pub fn detect_mimetype(data: &[u8]) -> String {
return String::from_utf8(item[1].to_vec()).unwrap();
}
}
"".to_owned()
str!()
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
HAS_PROTOCOL.is_match(url.as_ref().to_lowercase().as_str())
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> Result<bool, ParseError> {
Url::parse(url.as_ref()).and_then(|u| Ok(u.scheme() == "data"))
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_valid_url<T: AsRef<str>>(path: T) -> bool {
REGEX_URL.is_match(path.as_ref())
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_valid_url(to.as_ref()) {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
@ -113,7 +117,7 @@ pub fn resolve_css_imports(
cache: &mut HashMap<String, String>,
client: &Client,
css_string: &str,
as_dataurl: bool,
as_data_url: bool,
href: &str,
opt_no_images: bool,
opt_silent: bool,
@ -150,7 +154,7 @@ pub fn resolve_css_imports(
cache,
client,
&content,
true, // Finally, convert to a dataurl
true, // Finally, convert to a data URL
&embedded_url,
opt_no_images,
opt_silent,
@ -188,8 +192,8 @@ pub fn resolve_css_imports(
resolved_css.replace_range(target_range, &replacement);
}
if as_dataurl {
data_to_dataurl("text/css", resolved_css.as_bytes())
if as_data_url {
data_to_data_url("text/css", resolved_css.as_bytes())
} else {
resolved_css
}
@ -205,3 +209,61 @@ pub fn clean_url<T: AsRef<str>>(url: T) -> String {
}
result.to_string()
}
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("http://[::1]").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
if comma_loc == path.len() {
return str!();
}
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let data: String = form_urlencoded::parse(raw_data.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect();
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut mime_type: &str = "";
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
if item.eq_ignore_ascii_case("text/html") {
mime_type = item;
continue;
}
}
if item.eq_ignore_ascii_case("base64") || item.eq_ignore_ascii_case("utf8") {
encoding = item;
}
i = i + 1;
}
if mime_type.eq_ignore_ascii_case("text/html") {
if encoding.eq_ignore_ascii_case("base64") {
String::from_utf8(decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else {
data
}
} else {
str!()
}
}