Merge pull request #188 from snshn/metadata-tag-function
Move metadata tag code into a function
This commit is contained in:
commit
c3ca2ad1d5
4 changed files with 135 additions and 35 deletions
38
src/html.rs
38
src/html.rs
|
@ -1,4 +1,5 @@
|
|||
use base64;
|
||||
use chrono::prelude::*;
|
||||
use html5ever::interface::QualName;
|
||||
use html5ever::parse_document;
|
||||
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
||||
|
@ -7,6 +8,7 @@ use html5ever::tendril::{format_tendril, Tendril, TendrilSink};
|
|||
use html5ever::tree_builder::{Attribute, TreeSink};
|
||||
use html5ever::{local_name, namespace_url, ns};
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::Url;
|
||||
use sha2::{Digest, Sha256, Sha384, Sha512};
|
||||
use std::collections::HashMap;
|
||||
use std::default::Default;
|
||||
|
@ -1119,3 +1121,39 @@ pub fn stringify_document(
|
|||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn metadata_tag(url: &str) -> String {
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
|
||||
// Safe to unwrap (we just put this through an HTTP request)
|
||||
match Url::parse(url) {
|
||||
Ok(mut clean_url) => {
|
||||
clean_url.set_fragment(None);
|
||||
|
||||
// Prevent credentials from getting into metadata
|
||||
if is_http_url(url) {
|
||||
// Only HTTP(S) URLs may feature credentials
|
||||
clean_url.set_username("").unwrap();
|
||||
clean_url.set_password(None).unwrap();
|
||||
}
|
||||
|
||||
if is_http_url(url) {
|
||||
format!(
|
||||
"<!-- Saved from {} at {} using {} v{} -->",
|
||||
&clean_url,
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->",
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
}
|
||||
}
|
||||
Err(_) => str!(),
|
||||
}
|
||||
}
|
||||
|
|
49
src/main.rs
49
src/main.rs
|
@ -1,7 +1,5 @@
|
|||
use chrono::prelude::*;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use reqwest::Url;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
|
@ -10,7 +8,7 @@ use std::path::Path;
|
|||
use std::process;
|
||||
use std::time::Duration;
|
||||
|
||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||
use monolith::html::{html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets};
|
||||
use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url};
|
||||
use monolith::utils::retrieve_asset;
|
||||
|
||||
|
@ -63,6 +61,7 @@ fn main() {
|
|||
let mut target: String = str!(original_target.clone()).replace("\\", "/");
|
||||
let path_is_relative: bool = path.is_relative();
|
||||
|
||||
// Determine exact target URL
|
||||
if target.clone().len() == 0 {
|
||||
eprintln!("No target specified");
|
||||
process::exit(1);
|
||||
|
@ -89,6 +88,7 @@ fn main() {
|
|||
target_url = target.as_str();
|
||||
}
|
||||
|
||||
// Define output
|
||||
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
||||
|
||||
// Initialize client
|
||||
|
@ -98,7 +98,6 @@ fn main() {
|
|||
USER_AGENT,
|
||||
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
|
||||
);
|
||||
|
||||
let timeout: u64 = if app_args.timeout > 0 {
|
||||
app_args.timeout
|
||||
} else {
|
||||
|
@ -111,7 +110,7 @@ fn main() {
|
|||
.build()
|
||||
.expect("Failed to initialize HTTP client");
|
||||
|
||||
// Retrieve root document
|
||||
// Retrieve target document
|
||||
if is_file_url(target_url) || is_http_url(target_url) {
|
||||
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
|
||||
Ok((data, final_url, _media_type)) => {
|
||||
|
@ -135,8 +134,7 @@ fn main() {
|
|||
process::exit(1);
|
||||
}
|
||||
|
||||
let time_saved = Utc::now();
|
||||
|
||||
// Embed remote assets
|
||||
walk_and_embed_assets(
|
||||
&mut cache,
|
||||
&client,
|
||||
|
@ -150,7 +148,8 @@ fn main() {
|
|||
app_args.silent,
|
||||
);
|
||||
|
||||
let mut html: String = stringify_document(
|
||||
// Serialize DOM tree
|
||||
let mut result: String = stringify_document(
|
||||
&dom.document,
|
||||
app_args.no_css,
|
||||
app_args.no_frames,
|
||||
|
@ -159,37 +158,17 @@ fn main() {
|
|||
app_args.isolate,
|
||||
);
|
||||
|
||||
// Add metadata tag
|
||||
if !app_args.no_metadata {
|
||||
// Safe to unwrap (we just put this through an HTTP request)
|
||||
let mut clean_url = Url::parse(&base_url).unwrap();
|
||||
clean_url.set_fragment(None);
|
||||
// Prevent credentials from getting into metadata
|
||||
if is_http_url(&base_url) {
|
||||
// Only HTTP(S) URLs may feature credentials
|
||||
clean_url.set_username("").unwrap();
|
||||
clean_url.set_password(None).unwrap();
|
||||
let metadata_comment = metadata_tag(&base_url);
|
||||
result.insert_str(0, &metadata_comment);
|
||||
if metadata_comment.len() > 0 {
|
||||
result.insert_str(metadata_comment.len(), "\n");
|
||||
}
|
||||
let timestamp = time_saved.to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment = if is_http_url(&base_url) {
|
||||
format!(
|
||||
"<!-- Saved from {} at {} using {} v{} -->\n",
|
||||
&clean_url,
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->\n",
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
};
|
||||
html.insert_str(0, &metadata_comment);
|
||||
}
|
||||
|
||||
// Write result into stdout or file
|
||||
output
|
||||
.writeln_str(&html)
|
||||
.writeln_str(&result)
|
||||
.expect("Could not write HTML output");
|
||||
}
|
||||
|
|
82
src/tests/html/metadata_tag.rs
Normal file
82
src/tests/html/metadata_tag.rs
Normal file
|
@ -0,0 +1,82 @@
|
|||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use chrono::prelude::*;
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn http_url() {
|
||||
let url = "http://192.168.1.1/";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from {} at {} using {} v{} -->",
|
||||
&url,
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn file_url() {
|
||||
let url = "file:///home/monolith/index.html";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->",
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
let url = "data:text/html,Hello%2C%20World!";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->",
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert_eq!(html::metadata_tag(""), "");
|
||||
}
|
||||
}
|
|
@ -2,5 +2,6 @@ mod embed_srcset;
|
|||
mod get_node_name;
|
||||
mod has_proper_integrity;
|
||||
mod is_icon;
|
||||
mod metadata_tag;
|
||||
mod stringify_document;
|
||||
mod walk_and_embed_assets;
|
||||
|
|
Loading…
Reference in a new issue