2019-08-23 11:49:14 +02:00
|
|
|
#[macro_use]
|
2019-08-23 05:17:15 +02:00
|
|
|
extern crate clap;
|
|
|
|
|
2019-10-10 07:28:12 +02:00
|
|
|
mod args;
|
2019-12-26 06:41:03 +01:00
|
|
|
mod macros;
|
2019-10-10 07:28:12 +02:00
|
|
|
|
2020-01-02 16:31:55 +01:00
|
|
|
use crate::args::AppArgs;
|
2019-09-22 02:06:00 +02:00
|
|
|
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
2019-09-29 23:15:49 +02:00
|
|
|
use monolith::http::retrieve_asset;
|
|
|
|
use monolith::utils::is_valid_url;
|
2020-01-07 05:22:28 +01:00
|
|
|
use reqwest::blocking::Client;
|
2019-12-10 03:13:25 +01:00
|
|
|
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
2019-10-23 00:33:22 +02:00
|
|
|
use std::collections::HashMap;
|
2019-12-26 06:41:03 +01:00
|
|
|
use std::fs::{remove_file, File};
|
|
|
|
use std::io::{Error, Write};
|
2019-12-10 03:13:25 +01:00
|
|
|
use std::time::Duration;
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-12-26 06:41:03 +01:00
|
|
|
fn create_file(file_path: &String, content: String) -> Result<(), Error> {
|
|
|
|
let file = File::create(file_path.as_str());
|
|
|
|
|
|
|
|
let mut file = match file {
|
|
|
|
Ok(file) => file,
|
|
|
|
Err(error) => return Err(error),
|
|
|
|
};
|
|
|
|
|
|
|
|
if content != str!() {
|
|
|
|
file.write_all(content.as_bytes())?;
|
|
|
|
file.write_all("\n".as_bytes())?;
|
|
|
|
file.sync_all()?;
|
|
|
|
} else {
|
|
|
|
// Remove the file right away if it had no content
|
|
|
|
remove_file(file_path.as_str())?;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2019-08-23 05:17:15 +02:00
|
|
|
fn main() {
|
2019-10-10 07:28:12 +02:00
|
|
|
let app_args = AppArgs::get();
|
2019-10-23 00:33:22 +02:00
|
|
|
let cache = &mut HashMap::new();
|
2019-12-26 06:41:03 +01:00
|
|
|
|
|
|
|
// Attempt to create output file
|
|
|
|
if app_args.output != str!() {
|
|
|
|
create_file(&app_args.output, str!()).unwrap();
|
|
|
|
}
|
|
|
|
|
2019-10-10 07:28:12 +02:00
|
|
|
if is_valid_url(app_args.url_target.as_str()) {
|
2019-12-10 03:13:25 +01:00
|
|
|
// Initialize client
|
|
|
|
let mut header_map = HeaderMap::new();
|
|
|
|
match HeaderValue::from_str(&app_args.user_agent) {
|
|
|
|
Ok(header) => header_map.insert(USER_AGENT, header),
|
|
|
|
Err(err) => {
|
|
|
|
eprintln!("Invalid user agent! {}", err);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
};
|
2020-01-07 05:22:28 +01:00
|
|
|
let client = Client::builder()
|
2019-12-10 03:13:25 +01:00
|
|
|
.timeout(Duration::from_secs(10))
|
|
|
|
.danger_accept_invalid_certs(app_args.insecure)
|
|
|
|
.default_headers(header_map)
|
|
|
|
.build()
|
|
|
|
.expect("Failed to initialize HTTP client");
|
|
|
|
|
2019-12-26 06:41:03 +01:00
|
|
|
// Retrieve root document
|
2019-10-01 05:58:09 +02:00
|
|
|
let (data, final_url) = retrieve_asset(
|
2019-10-23 00:33:22 +02:00
|
|
|
cache,
|
2019-12-10 03:13:25 +01:00
|
|
|
&client,
|
2019-10-10 07:28:12 +02:00
|
|
|
app_args.url_target.as_str(),
|
2019-09-22 02:06:00 +02:00
|
|
|
false,
|
|
|
|
"",
|
2019-10-10 07:28:12 +02:00
|
|
|
app_args.silent,
|
2019-09-22 02:06:00 +02:00
|
|
|
)
|
|
|
|
.unwrap();
|
2020-01-09 00:51:18 +01:00
|
|
|
let downloaded_time = time::now();
|
2019-08-24 05:06:06 +02:00
|
|
|
let dom = html_to_dom(&data);
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-08-24 05:06:06 +02:00
|
|
|
walk_and_embed_assets(
|
2019-10-23 00:33:22 +02:00
|
|
|
cache,
|
2019-12-10 03:13:25 +01:00
|
|
|
&client,
|
2019-10-01 05:58:09 +02:00
|
|
|
&final_url,
|
2019-08-24 05:06:06 +02:00
|
|
|
&dom.document,
|
2019-10-10 07:28:12 +02:00
|
|
|
app_args.no_css,
|
|
|
|
app_args.no_js,
|
|
|
|
app_args.no_images,
|
|
|
|
app_args.silent,
|
|
|
|
app_args.no_frames,
|
2019-09-22 02:06:00 +02:00
|
|
|
);
|
|
|
|
|
2020-01-09 00:51:18 +01:00
|
|
|
let mut html: String = stringify_document(
|
2019-09-22 02:06:00 +02:00
|
|
|
&dom.document,
|
2019-10-10 07:28:12 +02:00
|
|
|
app_args.no_css,
|
|
|
|
app_args.no_frames,
|
|
|
|
app_args.no_js,
|
|
|
|
app_args.no_images,
|
|
|
|
app_args.isolate,
|
2019-08-24 05:06:06 +02:00
|
|
|
);
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2020-01-09 01:00:53 +01:00
|
|
|
if !app_args.no_context {
|
|
|
|
html.insert_str(
|
|
|
|
0,
|
|
|
|
&format!(
|
|
|
|
"<!--- Downloaded from {} on {}using {} v{} -->\n",
|
|
|
|
&final_url,
|
|
|
|
downloaded_time.rfc822(),
|
|
|
|
env!("CARGO_PKG_NAME"),
|
|
|
|
env!("CARGO_PKG_VERSION"),
|
|
|
|
),
|
|
|
|
);
|
|
|
|
}
|
2020-01-09 00:51:18 +01:00
|
|
|
|
2019-12-26 06:41:03 +01:00
|
|
|
if app_args.output == str!() {
|
|
|
|
println!("{}", html);
|
|
|
|
} else {
|
|
|
|
create_file(&app_args.output, html).unwrap();
|
|
|
|
}
|
2019-08-23 05:17:15 +02:00
|
|
|
}
|
|
|
|
}
|