From 0347fd398579c69580df56dd283b671e42f57224 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Thu, 25 Jun 2020 18:23:56 -0400 Subject: [PATCH] move metadata tag code into a function --- src/html.rs | 38 ++++++++++++++++ src/main.rs | 49 ++++++-------------- src/tests/html/metadata_tag.rs | 82 ++++++++++++++++++++++++++++++++++ src/tests/html/mod.rs | 1 + 4 files changed, 135 insertions(+), 35 deletions(-) create mode 100644 src/tests/html/metadata_tag.rs diff --git a/src/html.rs b/src/html.rs index e2dead3..f16f4bc 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,4 +1,5 @@ use base64; +use chrono::prelude::*; use html5ever::interface::QualName; use html5ever::parse_document; use html5ever::rcdom::{Handle, NodeData, RcDom}; @@ -7,6 +8,7 @@ use html5ever::tendril::{format_tendril, Tendril, TendrilSink}; use html5ever::tree_builder::{Attribute, TreeSink}; use html5ever::{local_name, namespace_url, ns}; use reqwest::blocking::Client; +use reqwest::Url; use sha2::{Digest, Sha256, Sha384, Sha512}; use std::collections::HashMap; use std::default::Default; @@ -1119,3 +1121,39 @@ pub fn stringify_document( result } + +pub fn metadata_tag(url: &str) -> String { + let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); + + // Safe to unwrap (we just put this through an HTTP request) + match Url::parse(url) { + Ok(mut clean_url) => { + clean_url.set_fragment(None); + + // Prevent credentials from getting into metadata + if is_http_url(url) { + // Only HTTP(S) URLs may feature credentials + clean_url.set_username("").unwrap(); + clean_url.set_password(None).unwrap(); + } + + if is_http_url(url) { + format!( + "", + &clean_url, + timestamp, + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + ) + } else { + format!( + "", + timestamp, + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + ) + } + } + Err(_) => str!(), + } +} diff --git a/src/main.rs b/src/main.rs index 0a72c14..131120e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,5 @@ -use chrono::prelude::*; use reqwest::blocking::Client; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; -use reqwest::Url; use std::collections::HashMap; use std::env; use std::fs; @@ -10,7 +8,7 @@ use std::path::Path; use std::process; use std::time::Duration; -use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; +use monolith::html::{html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets}; use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url}; use monolith::utils::retrieve_asset; @@ -63,6 +61,7 @@ fn main() { let mut target: String = str!(original_target.clone()).replace("\\", "/"); let path_is_relative: bool = path.is_relative(); + // Determine exact target URL if target.clone().len() == 0 { eprintln!("No target specified"); process::exit(1); @@ -89,6 +88,7 @@ fn main() { target_url = target.as_str(); } + // Define output let mut output = Output::new(&app_args.output).expect("Could not prepare output"); // Initialize client @@ -98,7 +98,6 @@ fn main() { USER_AGENT, HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"), ); - let timeout: u64 = if app_args.timeout > 0 { app_args.timeout } else { @@ -111,7 +110,7 @@ fn main() { .build() .expect("Failed to initialize HTTP client"); - // Retrieve root document + // Retrieve target document if is_file_url(target_url) || is_http_url(target_url) { match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) { Ok((data, final_url, _media_type)) => { @@ -135,8 +134,7 @@ fn main() { process::exit(1); } - let time_saved = Utc::now(); - + // Embed remote assets walk_and_embed_assets( &mut cache, &client, @@ -150,7 +148,8 @@ fn main() { app_args.silent, ); - let mut html: String = stringify_document( + // Serialize DOM tree + let mut result: String = stringify_document( &dom.document, app_args.no_css, app_args.no_frames, @@ -159,37 +158,17 @@ fn main() { app_args.isolate, ); + // Add metadata tag if !app_args.no_metadata { - // Safe to unwrap (we just put this through an HTTP request) - let mut clean_url = Url::parse(&base_url).unwrap(); - clean_url.set_fragment(None); - // Prevent credentials from getting into metadata - if is_http_url(&base_url) { - // Only HTTP(S) URLs may feature credentials - clean_url.set_username("").unwrap(); - clean_url.set_password(None).unwrap(); + let metadata_comment = metadata_tag(&base_url); + result.insert_str(0, &metadata_comment); + if metadata_comment.len() > 0 { + result.insert_str(metadata_comment.len(), "\n"); } - let timestamp = time_saved.to_rfc3339_opts(SecondsFormat::Secs, true); - let metadata_comment = if is_http_url(&base_url) { - format!( - "\n", - &clean_url, - timestamp, - env!("CARGO_PKG_NAME"), - env!("CARGO_PKG_VERSION"), - ) - } else { - format!( - "\n", - timestamp, - env!("CARGO_PKG_NAME"), - env!("CARGO_PKG_VERSION"), - ) - }; - html.insert_str(0, &metadata_comment); } + // Write result into stdout or file output - .writeln_str(&html) + .writeln_str(&result) .expect("Could not write HTML output"); } diff --git a/src/tests/html/metadata_tag.rs b/src/tests/html/metadata_tag.rs new file mode 100644 index 0000000..cef13bf --- /dev/null +++ b/src/tests/html/metadata_tag.rs @@ -0,0 +1,82 @@ +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod passing { + use chrono::prelude::*; + + use crate::html; + + #[test] + fn http_url() { + let url = "http://192.168.1.1/"; + let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); + let metadata_comment: String = html::metadata_tag(url); + + assert_eq!( + metadata_comment, + format!( + "", + &url, + timestamp, + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + ) + ); + } + + #[test] + fn file_url() { + let url = "file:///home/monolith/index.html"; + let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); + let metadata_comment: String = html::metadata_tag(url); + + assert_eq!( + metadata_comment, + format!( + "", + timestamp, + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + ) + ); + } + + #[test] + fn data_url() { + let url = "data:text/html,Hello%2C%20World!"; + let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); + let metadata_comment: String = html::metadata_tag(url); + + assert_eq!( + metadata_comment, + format!( + "", + timestamp, + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + ) + ); + } +} + +// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ +// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ +// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ +// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod failing { + use crate::html; + + #[test] + fn empty_string() { + assert_eq!(html::metadata_tag(""), ""); + } +} diff --git a/src/tests/html/mod.rs b/src/tests/html/mod.rs index f44ce06..cbe8321 100644 --- a/src/tests/html/mod.rs +++ b/src/tests/html/mod.rs @@ -2,5 +2,6 @@ mod embed_srcset; mod get_node_name; mod has_proper_integrity; mod is_icon; +mod metadata_tag; mod stringify_document; mod walk_and_embed_assets;