diff --git a/src/html.rs b/src/html.rs
index e2dead3..f16f4bc 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -1,4 +1,5 @@
use base64;
+use chrono::prelude::*;
use html5ever::interface::QualName;
use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
@@ -7,6 +8,7 @@ use html5ever::tendril::{format_tendril, Tendril, TendrilSink};
use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever::{local_name, namespace_url, ns};
use reqwest::blocking::Client;
+use reqwest::Url;
use sha2::{Digest, Sha256, Sha384, Sha512};
use std::collections::HashMap;
use std::default::Default;
@@ -1119,3 +1121,39 @@ pub fn stringify_document(
result
}
+
+pub fn metadata_tag(url: &str) -> String {
+ let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
+
+ // Safe to unwrap (we just put this through an HTTP request)
+ match Url::parse(url) {
+ Ok(mut clean_url) => {
+ clean_url.set_fragment(None);
+
+ // Prevent credentials from getting into metadata
+ if is_http_url(url) {
+ // Only HTTP(S) URLs may feature credentials
+ clean_url.set_username("").unwrap();
+ clean_url.set_password(None).unwrap();
+ }
+
+ if is_http_url(url) {
+ format!(
+ "",
+ &clean_url,
+ timestamp,
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_VERSION"),
+ )
+ } else {
+ format!(
+ "",
+ timestamp,
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_VERSION"),
+ )
+ }
+ }
+ Err(_) => str!(),
+ }
+}
diff --git a/src/main.rs b/src/main.rs
index 0a72c14..131120e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,5 @@
-use chrono::prelude::*;
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
-use reqwest::Url;
use std::collections::HashMap;
use std::env;
use std::fs;
@@ -10,7 +8,7 @@ use std::path::Path;
use std::process;
use std::time::Duration;
-use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
+use monolith::html::{html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets};
use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url};
use monolith::utils::retrieve_asset;
@@ -63,6 +61,7 @@ fn main() {
let mut target: String = str!(original_target.clone()).replace("\\", "/");
let path_is_relative: bool = path.is_relative();
+ // Determine exact target URL
if target.clone().len() == 0 {
eprintln!("No target specified");
process::exit(1);
@@ -89,6 +88,7 @@ fn main() {
target_url = target.as_str();
}
+ // Define output
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
// Initialize client
@@ -98,7 +98,6 @@ fn main() {
USER_AGENT,
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
);
-
let timeout: u64 = if app_args.timeout > 0 {
app_args.timeout
} else {
@@ -111,7 +110,7 @@ fn main() {
.build()
.expect("Failed to initialize HTTP client");
- // Retrieve root document
+ // Retrieve target document
if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
Ok((data, final_url, _media_type)) => {
@@ -135,8 +134,7 @@ fn main() {
process::exit(1);
}
- let time_saved = Utc::now();
-
+ // Embed remote assets
walk_and_embed_assets(
&mut cache,
&client,
@@ -150,7 +148,8 @@ fn main() {
app_args.silent,
);
- let mut html: String = stringify_document(
+ // Serialize DOM tree
+ let mut result: String = stringify_document(
&dom.document,
app_args.no_css,
app_args.no_frames,
@@ -159,37 +158,17 @@ fn main() {
app_args.isolate,
);
+ // Add metadata tag
if !app_args.no_metadata {
- // Safe to unwrap (we just put this through an HTTP request)
- let mut clean_url = Url::parse(&base_url).unwrap();
- clean_url.set_fragment(None);
- // Prevent credentials from getting into metadata
- if is_http_url(&base_url) {
- // Only HTTP(S) URLs may feature credentials
- clean_url.set_username("").unwrap();
- clean_url.set_password(None).unwrap();
+ let metadata_comment = metadata_tag(&base_url);
+ result.insert_str(0, &metadata_comment);
+ if metadata_comment.len() > 0 {
+ result.insert_str(metadata_comment.len(), "\n");
}
- let timestamp = time_saved.to_rfc3339_opts(SecondsFormat::Secs, true);
- let metadata_comment = if is_http_url(&base_url) {
- format!(
- "\n",
- &clean_url,
- timestamp,
- env!("CARGO_PKG_NAME"),
- env!("CARGO_PKG_VERSION"),
- )
- } else {
- format!(
- "\n",
- timestamp,
- env!("CARGO_PKG_NAME"),
- env!("CARGO_PKG_VERSION"),
- )
- };
- html.insert_str(0, &metadata_comment);
}
+ // Write result into stdout or file
output
- .writeln_str(&html)
+ .writeln_str(&result)
.expect("Could not write HTML output");
}
diff --git a/src/tests/html/metadata_tag.rs b/src/tests/html/metadata_tag.rs
new file mode 100644
index 0000000..cef13bf
--- /dev/null
+++ b/src/tests/html/metadata_tag.rs
@@ -0,0 +1,82 @@
+// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
+// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
+// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
+// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
+// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
+// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
+
+#[cfg(test)]
+mod passing {
+ use chrono::prelude::*;
+
+ use crate::html;
+
+ #[test]
+ fn http_url() {
+ let url = "http://192.168.1.1/";
+ let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
+ let metadata_comment: String = html::metadata_tag(url);
+
+ assert_eq!(
+ metadata_comment,
+ format!(
+ "",
+ &url,
+ timestamp,
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_VERSION"),
+ )
+ );
+ }
+
+ #[test]
+ fn file_url() {
+ let url = "file:///home/monolith/index.html";
+ let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
+ let metadata_comment: String = html::metadata_tag(url);
+
+ assert_eq!(
+ metadata_comment,
+ format!(
+ "",
+ timestamp,
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_VERSION"),
+ )
+ );
+ }
+
+ #[test]
+ fn data_url() {
+ let url = "data:text/html,Hello%2C%20World!";
+ let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
+ let metadata_comment: String = html::metadata_tag(url);
+
+ assert_eq!(
+ metadata_comment,
+ format!(
+ "",
+ timestamp,
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_VERSION"),
+ )
+ );
+ }
+}
+
+// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
+// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
+// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
+// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
+// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
+// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
+
+#[cfg(test)]
+mod failing {
+ use crate::html;
+
+ #[test]
+ fn empty_string() {
+ assert_eq!(html::metadata_tag(""), "");
+ }
+}
diff --git a/src/tests/html/mod.rs b/src/tests/html/mod.rs
index f44ce06..cbe8321 100644
--- a/src/tests/html/mod.rs
+++ b/src/tests/html/mod.rs
@@ -2,5 +2,6 @@ mod embed_srcset;
mod get_node_name;
mod has_proper_integrity;
mod is_icon;
+mod metadata_tag;
mod stringify_document;
mod walk_and_embed_assets;