Made merge compatible with Y2Z/master
This commit is contained in:
commit
d3956a7905
12 changed files with 1931 additions and 136 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -2,10 +2,6 @@
|
|||
# will have compiled files and executables
|
||||
/target/
|
||||
|
||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
|
||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
|
||||
Cargo.lock
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
|
|
|
@ -9,13 +9,13 @@ os:
|
|||
|
||||
rust:
|
||||
- stable
|
||||
- beta
|
||||
- beta
|
||||
- nightly
|
||||
|
||||
before_script:
|
||||
- rustup component add rustfmt
|
||||
|
||||
script:
|
||||
- cargo build --verbose
|
||||
- cargo test --verbose
|
||||
- cargo build --locked --verbose
|
||||
- cargo test --locked --verbose
|
||||
- cargo fmt --all -- --check
|
||||
|
|
1724
Cargo.lock
generated
Normal file
1724
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,9 +1,10 @@
|
|||
[package]
|
||||
name = "monolith"
|
||||
version = "2.0.20"
|
||||
version = "2.0.23"
|
||||
authors = [
|
||||
"Sunshine <sunshine@uberspace.net>",
|
||||
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
||||
"Emmanuel Delaborde <th3rac25@gmail.com>",
|
||||
]
|
||||
description = "CLI tool for saving web pages as a single HTML file"
|
||||
|
||||
|
|
6
Makefile
6
Makefile
|
@ -3,13 +3,13 @@
|
|||
all: test build
|
||||
|
||||
build:
|
||||
@cargo build
|
||||
@cargo build --locked
|
||||
|
||||
install:
|
||||
@cargo install --force --path .
|
||||
@cargo install --force --locked --path .
|
||||
|
||||
test:
|
||||
@cargo test
|
||||
@cargo test --locked
|
||||
@cargo fmt --all -- --check
|
||||
|
||||
lint:
|
||||
|
|
62
src/args.rs
Normal file
62
src/args.rs
Normal file
|
@ -0,0 +1,62 @@
|
|||
use clap::{App, Arg};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct AppArgs {
|
||||
pub url_target: String,
|
||||
pub no_css: bool,
|
||||
pub no_frames: bool,
|
||||
pub no_images: bool,
|
||||
pub no_js: bool,
|
||||
pub insecure: bool,
|
||||
pub isolate: bool,
|
||||
pub silent: bool,
|
||||
pub user_agent: String,
|
||||
}
|
||||
|
||||
const DEFAULT_USER_AGENT: &str =
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
|
||||
|
||||
impl AppArgs {
|
||||
pub fn get() -> AppArgs {
|
||||
let app = App::new("monolith")
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(crate_description!())
|
||||
.arg(
|
||||
Arg::with_name("url")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.index(1)
|
||||
.help("URL to download"),
|
||||
)
|
||||
// .args_from_usage("-a, --include-audio 'Embed audio sources'")
|
||||
.args_from_usage("-c, --no-css 'Ignore styles'")
|
||||
.args_from_usage("-f, --no-frames 'Exclude iframes'")
|
||||
.args_from_usage("-i, --no-images 'Remove images'")
|
||||
.args_from_usage("-I, --isolate 'Cut off from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Exclude JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-s, --silent 'Suppress verbosity'")
|
||||
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
|
||||
// .args_from_usage("-v, --include-video 'Embed video sources'")
|
||||
.get_matches();
|
||||
let mut app_args = AppArgs::default();
|
||||
// Process the command
|
||||
app_args.url_target = app
|
||||
.value_of("url")
|
||||
.expect("please set target url")
|
||||
.to_string();
|
||||
app_args.no_css = app.is_present("no-css");
|
||||
app_args.no_frames = app.is_present("no-frames");
|
||||
app_args.no_images = app.is_present("no-images");
|
||||
app_args.no_js = app.is_present("no-js");
|
||||
app_args.insecure = app.is_present("insecure");
|
||||
app_args.isolate = app.is_present("isolate");
|
||||
app_args.silent = app.is_present("silent");
|
||||
app_args.user_agent = app
|
||||
.value_of("user-agent")
|
||||
.unwrap_or_else(|| DEFAULT_USER_AGENT)
|
||||
.to_string();
|
||||
app_args
|
||||
}
|
||||
}
|
13
src/html.rs
13
src/html.rs
|
@ -7,6 +7,7 @@ use html5ever::tree_builder::{Attribute, TreeSink};
|
|||
use html5ever::{local_name, namespace_url, ns};
|
||||
use http::retrieve_asset;
|
||||
use js::attr_is_event_handler;
|
||||
use std::collections::HashMap;
|
||||
use std::default::Default;
|
||||
use utils::{data_to_dataurl, is_valid_url, resolve_css_imports, resolve_url, url_has_protocol};
|
||||
|
||||
|
@ -43,6 +44,7 @@ pub fn is_icon(attr_value: &str) -> bool {
|
|||
}
|
||||
|
||||
pub fn walk_and_embed_assets(
|
||||
cache: &mut HashMap<String, String>,
|
||||
url: &str,
|
||||
node: &Handle,
|
||||
opt_no_css: bool,
|
||||
|
@ -58,6 +60,7 @@ pub fn walk_and_embed_assets(
|
|||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&url,
|
||||
child,
|
||||
opt_no_css,
|
||||
|
@ -103,6 +106,7 @@ pub fn walk_and_embed_assets(
|
|||
resolve_url(&url, &attr.value.to_string())
|
||||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (favicon_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
&href_full_url,
|
||||
true,
|
||||
"",
|
||||
|
@ -126,6 +130,7 @@ pub fn walk_and_embed_assets(
|
|||
resolve_url(&url, &attr.value.to_string())
|
||||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (css_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
&href_full_url,
|
||||
false,
|
||||
"text/css",
|
||||
|
@ -137,6 +142,7 @@ pub fn walk_and_embed_assets(
|
|||
attr.value.clear();
|
||||
|
||||
let css_resolved = resolve_css_imports(
|
||||
cache,
|
||||
&css_dataurl,
|
||||
&href_full_url,
|
||||
opt_user_agent,
|
||||
|
@ -177,6 +183,7 @@ pub fn walk_and_embed_assets(
|
|||
let src_full_url: String =
|
||||
resolve_url(&url, &value).unwrap_or(EMPTY_STRING.clone());
|
||||
let (img_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
&src_full_url,
|
||||
true,
|
||||
"",
|
||||
|
@ -210,6 +217,7 @@ pub fn walk_and_embed_assets(
|
|||
resolve_url(&url, &attr.value.to_string())
|
||||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (source_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
&srcset_full_url,
|
||||
true,
|
||||
"",
|
||||
|
@ -256,6 +264,7 @@ pub fn walk_and_embed_assets(
|
|||
resolve_url(&url, &attr.value.to_string())
|
||||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (js_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
&src_full_url,
|
||||
true,
|
||||
"application/javascript",
|
||||
|
@ -309,6 +318,7 @@ pub fn walk_and_embed_assets(
|
|||
let src_full_url: String =
|
||||
resolve_url(&url, &iframe_src).unwrap_or(EMPTY_STRING.clone());
|
||||
let (iframe_data, iframe_final_url) = retrieve_asset(
|
||||
cache,
|
||||
&src_full_url,
|
||||
false,
|
||||
"text/html",
|
||||
|
@ -319,6 +329,7 @@ pub fn walk_and_embed_assets(
|
|||
.unwrap_or((EMPTY_STRING.clone(), src_full_url));
|
||||
let dom = html_to_dom(&iframe_data);
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&iframe_final_url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
|
@ -353,6 +364,7 @@ pub fn walk_and_embed_assets(
|
|||
let poster_full_url: String = resolve_url(&url, &video_poster)
|
||||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (poster_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
&poster_full_url,
|
||||
true,
|
||||
"",
|
||||
|
@ -401,6 +413,7 @@ pub fn walk_and_embed_assets(
|
|||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&url,
|
||||
child,
|
||||
opt_no_css,
|
||||
|
|
91
src/http.rs
91
src/http.rs
|
@ -1,9 +1,11 @@
|
|||
use reqwest::header::{CONTENT_TYPE, USER_AGENT};
|
||||
use reqwest::Client;
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
use utils::{data_to_dataurl, is_data_url};
|
||||
|
||||
pub fn retrieve_asset(
|
||||
cache: &mut HashMap<String, String>,
|
||||
url: &str,
|
||||
as_dataurl: bool,
|
||||
mime: &str,
|
||||
|
@ -14,53 +16,54 @@ pub fn retrieve_asset(
|
|||
if is_data_url(&url).unwrap() {
|
||||
Ok((url.to_string(), url.to_string()))
|
||||
} else {
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.danger_accept_invalid_certs(opt_insecure)
|
||||
.build()?;
|
||||
let mut response = client.get(url).header(USER_AGENT, opt_user_agent).send()?;
|
||||
|
||||
if !opt_silent {
|
||||
if url == response.url().as_str() {
|
||||
eprintln!("[ {} ]", &url);
|
||||
} else {
|
||||
eprintln!("[ {} -> {} ]", &url, &response.url().as_str());
|
||||
if cache.contains_key(&url.to_string()) {
|
||||
// url is in cache
|
||||
if !opt_silent {
|
||||
eprintln!("[ {} ] (from cache)", &url);
|
||||
}
|
||||
}
|
||||
|
||||
if as_dataurl {
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain MIME type by reading the Content-Type header
|
||||
let mimetype = if mime == "" {
|
||||
response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or(&mime)
|
||||
} else {
|
||||
mime
|
||||
};
|
||||
|
||||
Ok((
|
||||
if response.status() != 200 {
|
||||
"".to_string()
|
||||
} else {
|
||||
data_to_dataurl(&mimetype, &data)
|
||||
},
|
||||
response.url().to_string(),
|
||||
))
|
||||
let data = cache.get(&url.to_string()).unwrap();
|
||||
Ok((data.to_string(), url.to_string()))
|
||||
} else {
|
||||
Ok((
|
||||
if response.status() != 200 {
|
||||
"".to_string()
|
||||
// url not in cache, we request it
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.danger_accept_invalid_certs(opt_insecure)
|
||||
.build()?;
|
||||
let mut response = client.get(url).header(USER_AGENT, opt_user_agent).send()?;
|
||||
|
||||
if !opt_silent {
|
||||
if url == response.url().as_str() {
|
||||
eprintln!("[ {} ]", &url);
|
||||
} else {
|
||||
response.text().unwrap()
|
||||
},
|
||||
response.url().to_string(),
|
||||
))
|
||||
eprintln!("[ {} -> {} ]", &url, &response.url().as_str());
|
||||
}
|
||||
}
|
||||
|
||||
if as_dataurl {
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain MIME type by reading the Content-Type header
|
||||
let mimetype = if mime == "" {
|
||||
response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or(&mime)
|
||||
} else {
|
||||
mime
|
||||
};
|
||||
let dataurl = data_to_dataurl(&mimetype, &data);
|
||||
// insert in cache
|
||||
cache.insert(response.url().to_string(), dataurl.to_string());
|
||||
Ok((dataurl, response.url().to_string()))
|
||||
} else {
|
||||
let content = response.text().unwrap();
|
||||
// insert in cache
|
||||
cache.insert(response.url().to_string(), content.clone());
|
||||
Ok((content, response.url().to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
80
src/main.rs
80
src/main.rs
|
@ -2,80 +2,50 @@
|
|||
extern crate clap;
|
||||
extern crate monolith;
|
||||
|
||||
use clap::{App, Arg};
|
||||
mod args;
|
||||
|
||||
use args::AppArgs;
|
||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||
use monolith::http::retrieve_asset;
|
||||
use monolith::utils::is_valid_url;
|
||||
|
||||
const DEFAULT_USER_AGENT: &str =
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn main() {
|
||||
let command = App::new("monolith")
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(crate_description!())
|
||||
.arg(
|
||||
Arg::with_name("url")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.index(1)
|
||||
.help("URL to download"),
|
||||
)
|
||||
// .args_from_usage("-a, --include-audio 'Embed audio sources'")
|
||||
.args_from_usage("-c, --no-css 'Ignore styles'")
|
||||
.args_from_usage("-f, --no-frames 'Exclude iframes'")
|
||||
.args_from_usage("-i, --no-images 'Remove images'")
|
||||
.args_from_usage("-I, --isolate 'Cut off from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Exclude JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-s, --silent 'Suppress verbosity'")
|
||||
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
|
||||
// .args_from_usage("-v, --include-video 'Embed video sources'")
|
||||
.get_matches();
|
||||
|
||||
// Process the command
|
||||
let arg_target: &str = command.value_of("url").unwrap();
|
||||
let opt_no_css: bool = command.is_present("no-css");
|
||||
let opt_no_frames: bool = command.is_present("no-frames");
|
||||
let opt_no_images: bool = command.is_present("no-images");
|
||||
let opt_no_js: bool = command.is_present("no-js");
|
||||
let opt_insecure: bool = command.is_present("insecure");
|
||||
let opt_isolate: bool = command.is_present("isolate");
|
||||
let opt_silent: bool = command.is_present("silent");
|
||||
let opt_user_agent: &str = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT);
|
||||
|
||||
if is_valid_url(arg_target) {
|
||||
let app_args = AppArgs::get();
|
||||
let cache = &mut HashMap::new();
|
||||
if is_valid_url(app_args.url_target.as_str()) {
|
||||
let (data, final_url) = retrieve_asset(
|
||||
&arg_target,
|
||||
cache,
|
||||
app_args.url_target.as_str(),
|
||||
false,
|
||||
"",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
app_args.user_agent.as_str(),
|
||||
app_args.silent,
|
||||
app_args.insecure,
|
||||
)
|
||||
.unwrap();
|
||||
let dom = html_to_dom(&data);
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&final_url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
opt_no_frames,
|
||||
app_args.no_css,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.user_agent.as_str(),
|
||||
app_args.silent,
|
||||
app_args.insecure,
|
||||
app_args.no_frames,
|
||||
);
|
||||
|
||||
let html: String = stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
app_args.no_css,
|
||||
app_args.no_frames,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.isolate,
|
||||
);
|
||||
|
||||
println!("{}", html);
|
||||
|
|
|
@ -3,6 +3,7 @@ use crate::html::{
|
|||
};
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn test_is_icon() {
|
||||
|
@ -58,6 +59,8 @@ fn test_get_parent_node_name() {
|
|||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets() {
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let html = "<div><P></P></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
|
@ -70,6 +73,7 @@ fn test_walk_and_embed_assets() {
|
|||
let opt_insecure = false;
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
|
@ -95,6 +99,7 @@ fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
|
|||
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
|
@ -104,6 +109,7 @@ fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
|
|||
let opt_insecure = false;
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
|
@ -131,6 +137,7 @@ fn test_walk_and_embed_assets_no_css() {
|
|||
<div style=\"display: none;\"></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_frames: bool = false;
|
||||
|
@ -140,6 +147,7 @@ fn test_walk_and_embed_assets_no_css() {
|
|||
let opt_insecure = false;
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
|
@ -174,6 +182,7 @@ fn test_walk_and_embed_assets_no_images() {
|
|||
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
|
@ -183,6 +192,7 @@ fn test_walk_and_embed_assets_no_images() {
|
|||
let opt_insecure = false;
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
|
@ -219,6 +229,7 @@ fn test_walk_and_embed_assets_no_frames() {
|
|||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
|
@ -228,6 +239,7 @@ fn test_walk_and_embed_assets_no_frames() {
|
|||
let opt_insecure = false;
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
|
@ -256,6 +268,7 @@ fn test_walk_and_embed_assets_no_js() {
|
|||
</div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
|
@ -265,6 +278,7 @@ fn test_walk_and_embed_assets_no_js() {
|
|||
let opt_insecure = false;
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
|
|
|
@ -1,13 +1,23 @@
|
|||
use crate::http::retrieve_asset;
|
||||
|
||||
use std::collections::HashMap;
|
||||
#[test]
|
||||
fn test_retrieve_asset() {
|
||||
let (data, final_url) =
|
||||
retrieve_asset("data:text/html;base64,...", true, "", "", true, false).unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
let (data, final_url) = retrieve_asset(
|
||||
cache,
|
||||
"data:text/html;base64,...",
|
||||
true,
|
||||
"",
|
||||
"",
|
||||
true,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&data, "data:text/html;base64,...");
|
||||
assert_eq!(&final_url, "data:text/html;base64,...");
|
||||
|
||||
let (data, final_url) = retrieve_asset(
|
||||
cache,
|
||||
"data:text/html;base64,...",
|
||||
true,
|
||||
"image/png",
|
||||
|
|
48
src/utils.rs
48
src/utils.rs
|
@ -4,7 +4,10 @@ use self::base64::encode;
|
|||
use http::retrieve_asset;
|
||||
use regex::Regex;
|
||||
use url::{ParseError, Url};
|
||||
use std::io::{stderr, Write};
|
||||
use std::{
|
||||
io::{stderr, Write},
|
||||
collections::HashMap,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
|
||||
|
@ -12,7 +15,7 @@ lazy_static! {
|
|||
static ref EMPTY_STRING: String = String::new();
|
||||
}
|
||||
|
||||
static MAGIC: [[&[u8]; 2]; 19] = [
|
||||
const MAGIC: [[&[u8]; 2]; 19] = [
|
||||
// Image
|
||||
[b"GIF87a", b"image/gif"],
|
||||
[b"GIF89a", b"image/gif"],
|
||||
|
@ -38,7 +41,7 @@ static MAGIC: [[&[u8]; 2]; 19] = [
|
|||
];
|
||||
|
||||
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
||||
let mimetype = if mime == "" {
|
||||
let mimetype = if mime.is_empty() {
|
||||
detect_mimetype(data)
|
||||
} else {
|
||||
mime.to_string()
|
||||
|
@ -47,44 +50,40 @@ pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
|||
}
|
||||
|
||||
pub fn detect_mimetype(data: &[u8]) -> String {
|
||||
let mut re = String::new();
|
||||
|
||||
for item in MAGIC.iter() {
|
||||
if data.starts_with(item[0]) {
|
||||
re = String::from_utf8(item[1].to_vec()).unwrap();
|
||||
break;
|
||||
return String::from_utf8(item[1].to_vec()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
re
|
||||
"".to_owned()
|
||||
}
|
||||
|
||||
pub fn url_has_protocol(url: &str) -> bool {
|
||||
HAS_PROTOCOL.is_match(&url.to_lowercase())
|
||||
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
|
||||
HAS_PROTOCOL.is_match(url.as_ref().to_lowercase().as_str())
|
||||
}
|
||||
|
||||
pub fn is_data_url(url: &str) -> Result<bool, ParseError> {
|
||||
match Url::parse(url) {
|
||||
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
pub fn is_data_url<T: AsRef<str>>(url: T) -> Result<bool, ParseError> {
|
||||
Url::parse(url.as_ref()).and_then(|u| Ok(u.scheme() == "data"))
|
||||
}
|
||||
|
||||
pub fn is_valid_url(path: &str) -> bool {
|
||||
REGEX_URL.is_match(path)
|
||||
pub fn is_valid_url<T: AsRef<str>>(path: T) -> bool {
|
||||
REGEX_URL.is_match(path.as_ref())
|
||||
}
|
||||
|
||||
pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
|
||||
let result = if is_valid_url(to) {
|
||||
to.to_string()
|
||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
||||
let result = if is_valid_url(to.as_ref()) {
|
||||
to.as_ref().to_string()
|
||||
} else {
|
||||
Url::parse(from)?.join(to)?.to_string()
|
||||
Url::parse(from.as_ref())?
|
||||
.join(to.as_ref())?
|
||||
.as_ref()
|
||||
.to_string()
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn resolve_css_imports(
|
||||
cache: &mut HashMap<String, String>,
|
||||
css_string: &str,
|
||||
href: &str,
|
||||
opt_user_agent: &str,
|
||||
|
@ -108,6 +107,7 @@ pub fn resolve_css_imports(
|
|||
|
||||
// The link is an @import link
|
||||
Some(_) => retrieve_asset(
|
||||
cache,
|
||||
&embedded_url,
|
||||
false, // Formating as data URL will be done later
|
||||
"text/css", // Expect CSS
|
||||
|
@ -116,6 +116,7 @@ pub fn resolve_css_imports(
|
|||
opt_insecure,
|
||||
)
|
||||
.map(|(content, _)| resolve_css_imports(
|
||||
cache,
|
||||
&content,
|
||||
&embedded_url,
|
||||
opt_user_agent,
|
||||
|
@ -125,6 +126,7 @@ pub fn resolve_css_imports(
|
|||
|
||||
// The link is some other, non-@import link
|
||||
None => retrieve_asset(
|
||||
cache,
|
||||
&embedded_url,
|
||||
true, // Format as data URL
|
||||
"", // Unknown MIME type
|
||||
|
|
Loading…
Reference in a new issue