Merge pull request #188 from snshn/metadata-tag-function
Move metadata tag code into a function
This commit is contained in:
commit
c3ca2ad1d5
4 changed files with 135 additions and 35 deletions
38
src/html.rs
38
src/html.rs
|
@ -1,4 +1,5 @@
|
||||||
use base64;
|
use base64;
|
||||||
|
use chrono::prelude::*;
|
||||||
use html5ever::interface::QualName;
|
use html5ever::interface::QualName;
|
||||||
use html5ever::parse_document;
|
use html5ever::parse_document;
|
||||||
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
||||||
|
@ -7,6 +8,7 @@ use html5ever::tendril::{format_tendril, Tendril, TendrilSink};
|
||||||
use html5ever::tree_builder::{Attribute, TreeSink};
|
use html5ever::tree_builder::{Attribute, TreeSink};
|
||||||
use html5ever::{local_name, namespace_url, ns};
|
use html5ever::{local_name, namespace_url, ns};
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
|
use reqwest::Url;
|
||||||
use sha2::{Digest, Sha256, Sha384, Sha512};
|
use sha2::{Digest, Sha256, Sha384, Sha512};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
|
@ -1119,3 +1121,39 @@ pub fn stringify_document(
|
||||||
|
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn metadata_tag(url: &str) -> String {
|
||||||
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
|
|
||||||
|
// Safe to unwrap (we just put this through an HTTP request)
|
||||||
|
match Url::parse(url) {
|
||||||
|
Ok(mut clean_url) => {
|
||||||
|
clean_url.set_fragment(None);
|
||||||
|
|
||||||
|
// Prevent credentials from getting into metadata
|
||||||
|
if is_http_url(url) {
|
||||||
|
// Only HTTP(S) URLs may feature credentials
|
||||||
|
clean_url.set_username("").unwrap();
|
||||||
|
clean_url.set_password(None).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_http_url(url) {
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from {} at {} using {} v{} -->",
|
||||||
|
&clean_url,
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from local source at {} using {} v{} -->",
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => str!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
49
src/main.rs
49
src/main.rs
|
@ -1,7 +1,5 @@
|
||||||
use chrono::prelude::*;
|
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||||
use reqwest::Url;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
@ -10,7 +8,7 @@ use std::path::Path;
|
||||||
use std::process;
|
use std::process;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
use monolith::html::{html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets};
|
||||||
use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url};
|
use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url};
|
||||||
use monolith::utils::retrieve_asset;
|
use monolith::utils::retrieve_asset;
|
||||||
|
|
||||||
|
@ -63,6 +61,7 @@ fn main() {
|
||||||
let mut target: String = str!(original_target.clone()).replace("\\", "/");
|
let mut target: String = str!(original_target.clone()).replace("\\", "/");
|
||||||
let path_is_relative: bool = path.is_relative();
|
let path_is_relative: bool = path.is_relative();
|
||||||
|
|
||||||
|
// Determine exact target URL
|
||||||
if target.clone().len() == 0 {
|
if target.clone().len() == 0 {
|
||||||
eprintln!("No target specified");
|
eprintln!("No target specified");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
|
@ -89,6 +88,7 @@ fn main() {
|
||||||
target_url = target.as_str();
|
target_url = target.as_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Define output
|
||||||
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
||||||
|
|
||||||
// Initialize client
|
// Initialize client
|
||||||
|
@ -98,7 +98,6 @@ fn main() {
|
||||||
USER_AGENT,
|
USER_AGENT,
|
||||||
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
|
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
|
||||||
);
|
);
|
||||||
|
|
||||||
let timeout: u64 = if app_args.timeout > 0 {
|
let timeout: u64 = if app_args.timeout > 0 {
|
||||||
app_args.timeout
|
app_args.timeout
|
||||||
} else {
|
} else {
|
||||||
|
@ -111,7 +110,7 @@ fn main() {
|
||||||
.build()
|
.build()
|
||||||
.expect("Failed to initialize HTTP client");
|
.expect("Failed to initialize HTTP client");
|
||||||
|
|
||||||
// Retrieve root document
|
// Retrieve target document
|
||||||
if is_file_url(target_url) || is_http_url(target_url) {
|
if is_file_url(target_url) || is_http_url(target_url) {
|
||||||
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
|
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
|
||||||
Ok((data, final_url, _media_type)) => {
|
Ok((data, final_url, _media_type)) => {
|
||||||
|
@ -135,8 +134,7 @@ fn main() {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let time_saved = Utc::now();
|
// Embed remote assets
|
||||||
|
|
||||||
walk_and_embed_assets(
|
walk_and_embed_assets(
|
||||||
&mut cache,
|
&mut cache,
|
||||||
&client,
|
&client,
|
||||||
|
@ -150,7 +148,8 @@ fn main() {
|
||||||
app_args.silent,
|
app_args.silent,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut html: String = stringify_document(
|
// Serialize DOM tree
|
||||||
|
let mut result: String = stringify_document(
|
||||||
&dom.document,
|
&dom.document,
|
||||||
app_args.no_css,
|
app_args.no_css,
|
||||||
app_args.no_frames,
|
app_args.no_frames,
|
||||||
|
@ -159,37 +158,17 @@ fn main() {
|
||||||
app_args.isolate,
|
app_args.isolate,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Add metadata tag
|
||||||
if !app_args.no_metadata {
|
if !app_args.no_metadata {
|
||||||
// Safe to unwrap (we just put this through an HTTP request)
|
let metadata_comment = metadata_tag(&base_url);
|
||||||
let mut clean_url = Url::parse(&base_url).unwrap();
|
result.insert_str(0, &metadata_comment);
|
||||||
clean_url.set_fragment(None);
|
if metadata_comment.len() > 0 {
|
||||||
// Prevent credentials from getting into metadata
|
result.insert_str(metadata_comment.len(), "\n");
|
||||||
if is_http_url(&base_url) {
|
|
||||||
// Only HTTP(S) URLs may feature credentials
|
|
||||||
clean_url.set_username("").unwrap();
|
|
||||||
clean_url.set_password(None).unwrap();
|
|
||||||
}
|
}
|
||||||
let timestamp = time_saved.to_rfc3339_opts(SecondsFormat::Secs, true);
|
|
||||||
let metadata_comment = if is_http_url(&base_url) {
|
|
||||||
format!(
|
|
||||||
"<!-- Saved from {} at {} using {} v{} -->\n",
|
|
||||||
&clean_url,
|
|
||||||
timestamp,
|
|
||||||
env!("CARGO_PKG_NAME"),
|
|
||||||
env!("CARGO_PKG_VERSION"),
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
format!(
|
|
||||||
"<!-- Saved from local source at {} using {} v{} -->\n",
|
|
||||||
timestamp,
|
|
||||||
env!("CARGO_PKG_NAME"),
|
|
||||||
env!("CARGO_PKG_VERSION"),
|
|
||||||
)
|
|
||||||
};
|
|
||||||
html.insert_str(0, &metadata_comment);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Write result into stdout or file
|
||||||
output
|
output
|
||||||
.writeln_str(&html)
|
.writeln_str(&result)
|
||||||
.expect("Could not write HTML output");
|
.expect("Could not write HTML output");
|
||||||
}
|
}
|
||||||
|
|
82
src/tests/html/metadata_tag.rs
Normal file
82
src/tests/html/metadata_tag.rs
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use chrono::prelude::*;
|
||||||
|
|
||||||
|
use crate::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn http_url() {
|
||||||
|
let url = "http://192.168.1.1/";
|
||||||
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
|
let metadata_comment: String = html::metadata_tag(url);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
metadata_comment,
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from {} at {} using {} v{} -->",
|
||||||
|
&url,
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn file_url() {
|
||||||
|
let url = "file:///home/monolith/index.html";
|
||||||
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
|
let metadata_comment: String = html::metadata_tag(url);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
metadata_comment,
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from local source at {} using {} v{} -->",
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn data_url() {
|
||||||
|
let url = "data:text/html,Hello%2C%20World!";
|
||||||
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
|
let metadata_comment: String = html::metadata_tag(url);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
metadata_comment,
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from local source at {} using {} v{} -->",
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod failing {
|
||||||
|
use crate::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_string() {
|
||||||
|
assert_eq!(html::metadata_tag(""), "");
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,5 +2,6 @@ mod embed_srcset;
|
||||||
mod get_node_name;
|
mod get_node_name;
|
||||||
mod has_proper_integrity;
|
mod has_proper_integrity;
|
||||||
mod is_icon;
|
mod is_icon;
|
||||||
|
mod metadata_tag;
|
||||||
mod stringify_document;
|
mod stringify_document;
|
||||||
mod walk_and_embed_assets;
|
mod walk_and_embed_assets;
|
||||||
|
|
Loading…
Reference in a new issue