cargo clippy
This commit is contained in:
parent
83e979307d
commit
14e9e6facc
@ -13,3 +13,5 @@ mime-sniffer = "0.1.2"
|
|||||||
regex = "1.2.1"
|
regex = "1.2.1"
|
||||||
reqwest = "0.9.20"
|
reqwest = "0.9.20"
|
||||||
url = "2.1.0"
|
url = "2.1.0"
|
||||||
|
lazy_static = "1.3.0"
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ If compared to saving websites with `wget -mpk`, `monolith` embeds all assets as
|
|||||||
### Installation
|
### Installation
|
||||||
$ git clone https://github.com/Y2Z/monolith.git
|
$ git clone https://github.com/Y2Z/monolith.git
|
||||||
$ cd monolith
|
$ cd monolith
|
||||||
$ cargo install
|
$ cargo install --path .
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
$ monolith https://lyrics.github.io/db/p/portishead/dummy/roads/ > portishead-roads-lyrics.html
|
$ monolith https://lyrics.github.io/db/p/portishead/dummy/roads/ > portishead-roads-lyrics.html
|
||||||
|
85
src/html.rs
85
src/html.rs
@ -1,13 +1,11 @@
|
|||||||
extern crate html5ever;
|
use http::{is_valid_url, resolve_url, retrieve_asset};
|
||||||
|
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
use std::io;
|
use std::io;
|
||||||
use http::{is_url, retrieve_asset, resolve_url};
|
|
||||||
|
|
||||||
use self::html5ever::parse_document;
|
use html5ever::parse_document;
|
||||||
use self::html5ever::rcdom::{Handle, NodeData, RcDom};
|
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
||||||
use self::html5ever::tendril::TendrilSink;
|
use html5ever::serialize::{serialize, SerializeOpts};
|
||||||
use self::html5ever::serialize::{SerializeOpts, serialize};
|
use html5ever::tendril::TendrilSink;
|
||||||
|
|
||||||
enum NodeMatch {
|
enum NodeMatch {
|
||||||
Icon,
|
Icon,
|
||||||
@ -19,19 +17,37 @@ enum NodeMatch {
|
|||||||
Other,
|
Other,
|
||||||
}
|
}
|
||||||
|
|
||||||
static PNG_PIXEL: &str = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
|
const PNG_PIXEL: &str = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
|
||||||
|
|
||||||
static JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
||||||
// Input
|
// Input
|
||||||
"onfocus", "onblur", "onselect", "onchange", "onsubmit", "onreset", "onkeydown", "onkeypress", "onkeyup",
|
"onfocus",
|
||||||
|
"onblur",
|
||||||
|
"onselect",
|
||||||
|
"onchange",
|
||||||
|
"onsubmit",
|
||||||
|
"onreset",
|
||||||
|
"onkeydown",
|
||||||
|
"onkeypress",
|
||||||
|
"onkeyup",
|
||||||
// Mouse
|
// Mouse
|
||||||
"onmouseover", "onmouseout", "onmousedown", "onmouseup", "onmousemove",
|
"onmouseover",
|
||||||
|
"onmouseout",
|
||||||
|
"onmousedown",
|
||||||
|
"onmouseup",
|
||||||
|
"onmousemove",
|
||||||
// Click
|
// Click
|
||||||
"onclick", "ondblclick",
|
"onclick",
|
||||||
|
"ondblclick",
|
||||||
// Load
|
// Load
|
||||||
"onload", "onunload", "onabort", "onerror", "onresize",
|
"onload",
|
||||||
|
"onunload",
|
||||||
|
"onabort",
|
||||||
|
"onerror",
|
||||||
|
"onresize",
|
||||||
];
|
];
|
||||||
|
|
||||||
|
#[allow(clippy::cognitive_complexity)]
|
||||||
pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_images: bool) {
|
pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_images: bool) {
|
||||||
match node.data {
|
match node.data {
|
||||||
NodeData::Document => {
|
NodeData::Document => {
|
||||||
@ -39,28 +55,24 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_i
|
|||||||
for child in node.children.borrow().iter() {
|
for child in node.children.borrow().iter() {
|
||||||
walk_and_embed_assets(&url, child, opt_no_js, opt_no_images);
|
walk_and_embed_assets(&url, child, opt_no_js, opt_no_images);
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
|
|
||||||
NodeData::Doctype {
|
NodeData::Doctype { .. } => {}
|
||||||
name: _,
|
|
||||||
public_id: _,
|
|
||||||
system_id: _,
|
|
||||||
} => {},
|
|
||||||
|
|
||||||
NodeData::Text { contents: _, } => {},
|
NodeData::Text { .. } => {}
|
||||||
|
|
||||||
NodeData::Comment { contents: _, } => {
|
NodeData::Comment { .. } => {
|
||||||
// Note: in case of opt_no_js being set to true, there's no need to worry about
|
// Note: in case of opt_no_js being set to true, there's no need to worry about
|
||||||
// getting rid of comments that may contain scripts, e.g. <!--[if IE]><script>...
|
// getting rid of comments that may contain scripts, e.g. <!--[if IE]><script>...
|
||||||
// since that's not part of W3C standard and gets ignored by browsers other than IE [5, 9]
|
// since that's not part of W3C standard and gets ignored by browsers other than IE [5, 9]
|
||||||
},
|
}
|
||||||
|
|
||||||
NodeData::Element {
|
NodeData::Element {
|
||||||
ref name,
|
ref name,
|
||||||
ref attrs,
|
ref attrs,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
let ref mut attrs_mut = attrs.borrow_mut();
|
let attrs_mut = &mut attrs.borrow_mut();
|
||||||
let mut found = NodeMatch::Other;
|
let mut found = NodeMatch::Other;
|
||||||
|
|
||||||
if &name.local == "link" {
|
if &name.local == "link" {
|
||||||
@ -95,7 +107,7 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_i
|
|||||||
attr.value.push_slice(favicon_datauri.unwrap().as_str());
|
attr.value.push_slice(favicon_datauri.unwrap().as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
NodeMatch::Image => {
|
NodeMatch::Image => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "src" {
|
if &attr.name.local == "src" {
|
||||||
@ -110,7 +122,7 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
NodeMatch::Anchor => {
|
NodeMatch::Anchor => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "href" {
|
if &attr.name.local == "href" {
|
||||||
@ -124,17 +136,18 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_i
|
|||||||
attr.value.push_slice(href_full_url.unwrap().as_str());
|
attr.value.push_slice(href_full_url.unwrap().as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
NodeMatch::StyleSheet => {
|
NodeMatch::StyleSheet => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "href" {
|
if &attr.name.local == "href" {
|
||||||
let href_full_url = resolve_url(&url, &attr.value.to_string());
|
let href_full_url = resolve_url(&url, &attr.value.to_string());
|
||||||
let css_datauri = retrieve_asset(&href_full_url.unwrap(), true, "text/css");
|
let css_datauri =
|
||||||
|
retrieve_asset(&href_full_url.unwrap(), true, "text/css");
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(css_datauri.unwrap().as_str());
|
attr.value.push_slice(css_datauri.unwrap().as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
NodeMatch::Script => {
|
NodeMatch::Script => {
|
||||||
if opt_no_js {
|
if opt_no_js {
|
||||||
// Get rid of src and inner content of SCRIPT tags
|
// Get rid of src and inner content of SCRIPT tags
|
||||||
@ -148,18 +161,22 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_i
|
|||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "src" {
|
if &attr.name.local == "src" {
|
||||||
let src_full_url = resolve_url(&url, &attr.value.to_string());
|
let src_full_url = resolve_url(&url, &attr.value.to_string());
|
||||||
let js_datauri = retrieve_asset(&src_full_url.unwrap(), true, "application/javascript");
|
let js_datauri = retrieve_asset(
|
||||||
|
&src_full_url.unwrap(),
|
||||||
|
true,
|
||||||
|
"application/javascript",
|
||||||
|
);
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(js_datauri.unwrap().as_str());
|
attr.value.push_slice(js_datauri.unwrap().as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
NodeMatch::Form => {
|
NodeMatch::Form => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "action" {
|
if &attr.name.local == "action" {
|
||||||
// Do not touch action props which are set to a URL
|
// Do not touch action props which are set to a URL
|
||||||
if is_url(&attr.value) {
|
if is_valid_url(&attr.value) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -168,8 +185,8 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_i
|
|||||||
attr.value.push_slice(href_full_url.unwrap().as_str());
|
attr.value.push_slice(href_full_url.unwrap().as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
NodeMatch::Other => {},
|
NodeMatch::Other => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt_no_js {
|
if opt_no_js {
|
||||||
@ -185,7 +202,7 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_i
|
|||||||
for child in node.children.borrow().iter() {
|
for child in node.children.borrow().iter() {
|
||||||
walk_and_embed_assets(&url, child, opt_no_js, opt_no_images);
|
walk_and_embed_assets(&url, child, opt_no_js, opt_no_images);
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
|
|
||||||
NodeData::ProcessingInstruction { .. } => unreachable!(),
|
NodeData::ProcessingInstruction { .. } => unreachable!(),
|
||||||
}
|
}
|
||||||
|
160
src/http.rs
160
src/http.rs
@ -1,53 +1,59 @@
|
|||||||
extern crate regex;
|
use regex::Regex;
|
||||||
extern crate reqwest;
|
use reqwest::header::CONTENT_TYPE;
|
||||||
extern crate url;
|
use reqwest::Client;
|
||||||
|
|
||||||
use self::regex::Regex;
|
|
||||||
use self::reqwest::Client;
|
|
||||||
use self::reqwest::header::CONTENT_TYPE;
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use self::url::{Url, ParseError};
|
use url::{ParseError, Url};
|
||||||
use utils::data_to_dataurl;
|
use utils::data_to_dataurl;
|
||||||
|
|
||||||
pub fn is_url(path: &str) -> bool {
|
lazy_static! {
|
||||||
let re = Regex::new(r"^https?://").unwrap();
|
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
|
||||||
re.is_match(path)
|
}
|
||||||
|
|
||||||
|
pub fn is_valid_url(path: &str) -> bool {
|
||||||
|
REGEX_URL.is_match(path)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
|
pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
|
||||||
let mut result = String::new();
|
let result = if is_valid_url(to) {
|
||||||
|
// (anything, http://site.com/css/main.css)
|
||||||
if is_url(to) { // (anything, http://site.com/css/main.css)
|
to.to_string()
|
||||||
result = to.to_string();
|
|
||||||
} else {
|
} else {
|
||||||
if is_url(from) { // It's a remote resource (HTTP)
|
let mut re = String::new();
|
||||||
if to.chars().nth(0) == Some('/') { // (http://site.com/article/1, /...?)
|
if is_valid_url(from) {
|
||||||
|
// It's a remote resource (HTTP)
|
||||||
|
if to.chars().nth(0) == Some('/') {
|
||||||
|
// (http://site.com/article/1, /...?)
|
||||||
let from_url = Url::parse(from)?;
|
let from_url = Url::parse(from)?;
|
||||||
|
|
||||||
if to.chars().nth(1) == Some('/') { // (http://site.com/article/1, //images/1.png)
|
if to.chars().nth(1) == Some('/') {
|
||||||
result.push_str(from_url.scheme());
|
// (http://site.com/article/1, //images/1.png)
|
||||||
result.push_str(":");
|
re.push_str(from_url.scheme());
|
||||||
result.push_str(to);
|
re.push_str(":");
|
||||||
} else { // (http://site.com/article/1, /css/main.css)
|
re.push_str(to);
|
||||||
result.push_str(from_url.scheme());
|
} else {
|
||||||
result.push_str("://");
|
// (http://site.com/article/1, /css/main.css)
|
||||||
result.push_str(from_url.host_str().unwrap());
|
re.push_str(from_url.scheme());
|
||||||
result.push_str(to);
|
re.push_str("://");
|
||||||
|
re.push_str(from_url.host_str().unwrap());
|
||||||
|
re.push_str(to);
|
||||||
}
|
}
|
||||||
} else { // (http://site.com, css/main.css)
|
} else {
|
||||||
|
// (http://site.com, css/main.css)
|
||||||
// TODO improve to ensure no // or /// ever happen
|
// TODO improve to ensure no // or /// ever happen
|
||||||
result.push_str(from);
|
re.push_str(from);
|
||||||
result.push_str("/");
|
re.push_str("/");
|
||||||
result.push_str(to);
|
re.push_str(to);
|
||||||
}
|
}
|
||||||
} else { // It's a local resource (fs)
|
} else {
|
||||||
|
// It's a local resource (fs)
|
||||||
// TODO improve to ensure no // or /// ever happen
|
// TODO improve to ensure no // or /// ever happen
|
||||||
// TODO for fs use basepath instead of $from
|
// TODO for fs use basepath instead of $from
|
||||||
result.push_str(from);
|
re.push_str(from);
|
||||||
result.push_str("/");
|
re.push_str("/");
|
||||||
result.push_str(to);
|
re.push_str(to);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
re
|
||||||
|
};
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
@ -55,11 +61,15 @@ pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
|
|||||||
pub fn url_is_data(url: &str) -> Result<bool, String> {
|
pub fn url_is_data(url: &str) -> Result<bool, String> {
|
||||||
match Url::parse(url) {
|
match Url::parse(url) {
|
||||||
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
|
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
|
||||||
Err(err) => return Err(format!("{}", err)),
|
Err(err) => Err(format!("{}", err)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn retrieve_asset(url: &str, as_dataurl: bool, as_mime: &str) -> Result<String, reqwest::Error> {
|
pub fn retrieve_asset(
|
||||||
|
url: &str,
|
||||||
|
as_dataurl: bool,
|
||||||
|
as_mime: &str,
|
||||||
|
) -> Result<String, reqwest::Error> {
|
||||||
if url_is_data(&url).unwrap() {
|
if url_is_data(&url).unwrap() {
|
||||||
Ok(url.to_string())
|
Ok(url.to_string())
|
||||||
} else {
|
} else {
|
||||||
@ -67,10 +77,7 @@ pub fn retrieve_asset(url: &str, as_dataurl: bool, as_mime: &str) -> Result<Stri
|
|||||||
.timeout(Duration::from_secs(10))
|
.timeout(Duration::from_secs(10))
|
||||||
.build()
|
.build()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let mut response = client
|
let mut response = client.get(url).send().unwrap();
|
||||||
.get(url)
|
|
||||||
.send()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
if as_dataurl {
|
if as_dataurl {
|
||||||
// Convert response into a byte array
|
// Convert response into a byte array
|
||||||
@ -78,13 +85,15 @@ pub fn retrieve_asset(url: &str, as_dataurl: bool, as_mime: &str) -> Result<Stri
|
|||||||
response.copy_to(&mut data)?;
|
response.copy_to(&mut data)?;
|
||||||
|
|
||||||
// Attempt to obtain MIME type by reading the Content-Type header
|
// Attempt to obtain MIME type by reading the Content-Type header
|
||||||
let mut mimetype = as_mime;
|
let mimetype = if as_mime == "" {
|
||||||
if as_mime == "" {
|
response
|
||||||
mimetype = response.headers()
|
.headers()
|
||||||
.get(CONTENT_TYPE)
|
.get(CONTENT_TYPE)
|
||||||
.and_then(|header| header.to_str().ok())
|
.and_then(|header| header.to_str().ok())
|
||||||
.unwrap_or(&as_mime);
|
.unwrap_or(&as_mime)
|
||||||
}
|
} else {
|
||||||
|
as_mime
|
||||||
|
};
|
||||||
|
|
||||||
Ok(data_to_dataurl(&mimetype, &data))
|
Ok(data_to_dataurl(&mimetype, &data))
|
||||||
} else {
|
} else {
|
||||||
@ -99,37 +108,66 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_is_url() {
|
fn test_is_url() {
|
||||||
assert!(is_url("https://www.rust-lang.org/"));
|
assert!(is_valid_url("https://www.rust-lang.org/"));
|
||||||
assert!(is_url("http://kernel.org"));
|
assert!(is_valid_url("http://kernel.org"));
|
||||||
assert!(!is_url("./index.html"));
|
assert!(!is_valid_url("./index.html"));
|
||||||
assert!(!is_url("some-local-page.htm"));
|
assert!(!is_valid_url("some-local-page.htm"));
|
||||||
assert!(!is_url("ftp://1.2.3.4/www/index.html"));
|
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
|
||||||
assert!(!is_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"));
|
assert!(!is_valid_url(
|
||||||
|
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_resolve_url() -> Result<(), ParseError> {
|
fn test_resolve_url() -> Result<(), ParseError> {
|
||||||
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
||||||
assert_eq!(resolved_url.as_str(), "https://www.kernel.org/../category/signatures.html");
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"https://www.kernel.org/../category/signatures.html"
|
||||||
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
|
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
|
||||||
assert_eq!(resolved_url.as_str(), "https://www.kernel.org/category/signatures.html");
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"https://www.kernel.org/category/signatures.html"
|
||||||
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url("saved_page.htm", "https://www.kernel.org/category/signatures.html")?;
|
let resolved_url = resolve_url(
|
||||||
assert_eq!(resolved_url.as_str(), "https://www.kernel.org/category/signatures.html");
|
"saved_page.htm",
|
||||||
|
"https://www.kernel.org/category/signatures.html",
|
||||||
|
)?;
|
||||||
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"https://www.kernel.org/category/signatures.html"
|
||||||
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url("https://www.kernel.org", "//www.kernel.org/theme/images/logos/tux.png")?;
|
let resolved_url = resolve_url(
|
||||||
assert_eq!(resolved_url.as_str(), "https://www.kernel.org/theme/images/logos/tux.png");
|
"https://www.kernel.org",
|
||||||
|
"//www.kernel.org/theme/images/logos/tux.png",
|
||||||
|
)?;
|
||||||
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||||
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url("https://www.kernel.org/category/signatures.html", "/theme/images/logos/tux.png")?;
|
let resolved_url = resolve_url(
|
||||||
assert_eq!(resolved_url.as_str(), "https://www.kernel.org/theme/images/logos/tux.png");
|
"https://www.kernel.org/category/signatures.html",
|
||||||
|
"/theme/images/logos/tux.png",
|
||||||
|
)?;
|
||||||
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||||
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_url_is_data() {
|
fn test_url_is_data() {
|
||||||
assert!(url_is_data("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h").unwrap_or(false));
|
assert!(
|
||||||
|
url_is_data("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
|
||||||
|
.unwrap_or(false)
|
||||||
|
);
|
||||||
assert!(!url_is_data("https://kernel.org").unwrap_or(false));
|
assert!(!url_is_data("https://kernel.org").unwrap_or(false));
|
||||||
assert!(!url_is_data("//kernel.org").unwrap_or(false));
|
assert!(!url_is_data("//kernel.org").unwrap_or(false));
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
#[macro_use]
|
||||||
|
extern crate lazy_static;
|
||||||
|
extern crate html5ever;
|
||||||
|
extern crate regex;
|
||||||
|
extern crate reqwest;
|
||||||
|
extern crate url;
|
||||||
|
|
||||||
pub mod html;
|
pub mod html;
|
||||||
pub mod http;
|
pub mod http;
|
||||||
pub mod utils;
|
pub mod utils;
|
||||||
|
14
src/main.rs
14
src/main.rs
@ -2,20 +2,22 @@
|
|||||||
extern crate clap;
|
extern crate clap;
|
||||||
extern crate monolith;
|
extern crate monolith;
|
||||||
|
|
||||||
use clap::{Arg, App};
|
use clap::{App, Arg};
|
||||||
use monolith::http::{is_url, retrieve_asset};
|
use monolith::html::{html_to_dom, print_dom, walk_and_embed_assets};
|
||||||
use monolith::html::{walk_and_embed_assets, html_to_dom, print_dom};
|
use monolith::http::{is_valid_url, retrieve_asset};
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let command = App::new("monolith")
|
let command = App::new("monolith")
|
||||||
.version(crate_version!())
|
.version(crate_version!())
|
||||||
.author(crate_authors!())
|
.author(crate_authors!())
|
||||||
.about(crate_description!())
|
.about(crate_description!())
|
||||||
.arg(Arg::with_name("url")
|
.arg(
|
||||||
|
Arg::with_name("url")
|
||||||
.required(true)
|
.required(true)
|
||||||
.takes_value(true)
|
.takes_value(true)
|
||||||
.index(1)
|
.index(1)
|
||||||
.help("URL to download"))
|
.help("URL to download"),
|
||||||
|
)
|
||||||
.args_from_usage("-j, --no-js 'Excludes JavaScript'")
|
.args_from_usage("-j, --no-js 'Excludes JavaScript'")
|
||||||
.args_from_usage("-i, --no-images 'Removes images'")
|
.args_from_usage("-i, --no-images 'Removes images'")
|
||||||
.get_matches();
|
.get_matches();
|
||||||
@ -25,7 +27,7 @@ fn main() {
|
|||||||
let opt_no_js = command.is_present("no-js");
|
let opt_no_js = command.is_present("no-js");
|
||||||
let opt_no_img = command.is_present("no-images");
|
let opt_no_img = command.is_present("no-images");
|
||||||
|
|
||||||
if is_url(arg_target) {
|
if is_valid_url(arg_target) {
|
||||||
let data = retrieve_asset(&arg_target, false, "");
|
let data = retrieve_asset(&arg_target, false, "");
|
||||||
let dom = html_to_dom(&data.unwrap());
|
let dom = html_to_dom(&data.unwrap());
|
||||||
|
|
||||||
|
11
src/utils.rs
11
src/utils.rs
@ -5,14 +5,11 @@ use self::base64::encode;
|
|||||||
use self::mime_sniffer::MimeTypeSniffer;
|
use self::mime_sniffer::MimeTypeSniffer;
|
||||||
|
|
||||||
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
||||||
let mimetype: String;
|
let mimetype = if mime == "" {
|
||||||
|
detect_mimetype(data)
|
||||||
if mime == "" {
|
|
||||||
mimetype = detect_mimetype(data);
|
|
||||||
} else {
|
} else {
|
||||||
mimetype = mime.to_string();
|
mime.to_string()
|
||||||
}
|
};
|
||||||
|
|
||||||
format!("data:{};base64,{}", mimetype, encode(data))
|
format!("data:{};base64,{}", mimetype, encode(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user