2019-08-23 11:49:14 +02:00
|
|
|
#[macro_use]
|
2019-08-23 05:17:15 +02:00
|
|
|
extern crate clap;
|
|
|
|
|
2019-10-10 07:28:12 +02:00
|
|
|
mod args;
|
2019-12-26 06:41:03 +01:00
|
|
|
mod macros;
|
2019-10-10 07:28:12 +02:00
|
|
|
|
2020-01-02 16:31:55 +01:00
|
|
|
use crate::args::AppArgs;
|
2019-09-22 02:06:00 +02:00
|
|
|
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
2019-09-29 23:15:49 +02:00
|
|
|
use monolith::http::retrieve_asset;
|
|
|
|
use monolith::utils::is_valid_url;
|
2020-01-07 05:22:28 +01:00
|
|
|
use reqwest::blocking::Client;
|
2019-12-10 03:13:25 +01:00
|
|
|
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
2020-01-20 09:02:08 +01:00
|
|
|
use reqwest::Proxy;
|
2019-10-23 00:33:22 +02:00
|
|
|
use std::collections::HashMap;
|
2020-01-20 09:02:08 +01:00
|
|
|
use std::env;
|
2020-01-15 04:26:04 +01:00
|
|
|
use std::fs::File;
|
|
|
|
use std::io::{self, Error, Write};
|
|
|
|
use std::process;
|
2019-12-10 03:13:25 +01:00
|
|
|
use std::time::Duration;
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2020-01-15 04:26:04 +01:00
|
|
|
enum Output {
|
|
|
|
Stdout(io::Stdout),
|
|
|
|
File(File),
|
|
|
|
}
|
2019-12-26 06:41:03 +01:00
|
|
|
|
2020-01-15 04:26:04 +01:00
|
|
|
impl Output {
|
|
|
|
fn new(file_path: &str) -> Result<Output, Error> {
|
|
|
|
if file_path.is_empty() {
|
|
|
|
Ok(Output::Stdout(io::stdout()))
|
|
|
|
} else {
|
|
|
|
Ok(Output::File(File::create(file_path)?))
|
|
|
|
}
|
2019-12-26 06:41:03 +01:00
|
|
|
}
|
|
|
|
|
2020-01-15 04:26:04 +01:00
|
|
|
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
|
|
|
|
match self {
|
|
|
|
Output::Stdout(stdout) => {
|
|
|
|
writeln!(stdout, "{}", s)?;
|
|
|
|
stdout.flush()
|
|
|
|
}
|
|
|
|
Output::File(f) => {
|
|
|
|
writeln!(f, "{}", s)?;
|
|
|
|
f.flush()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-12-26 06:41:03 +01:00
|
|
|
}
|
|
|
|
|
2020-01-20 09:02:08 +01:00
|
|
|
fn create_http_client(args: &AppArgs) -> Result<Client, reqwest::Error> {
|
|
|
|
let mut header_map = HeaderMap::new();
|
|
|
|
header_map.insert(
|
|
|
|
USER_AGENT,
|
|
|
|
HeaderValue::from_str(&args.user_agent).expect("Invalid User-Agent header specified"),
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut builder = Client::builder()
|
|
|
|
.timeout(Duration::from_secs(10))
|
|
|
|
.danger_accept_invalid_certs(args.insecure)
|
|
|
|
.default_headers(header_map);
|
|
|
|
|
|
|
|
if let Ok(var) = env::var("https_proxy").or_else(|_| env::var("HTTPS_PROXY")) {
|
2020-01-20 09:17:24 +01:00
|
|
|
if !var.is_empty() {
|
|
|
|
let proxy = Proxy::https(&var)
|
|
|
|
.expect("Could not set HTTPS proxy. Please check $https_proxy env var");
|
|
|
|
builder = builder.proxy(proxy);
|
|
|
|
}
|
2020-01-20 09:02:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if let Ok(var) = env::var("http_proxy").or_else(|_| env::var("HTTP_PROXY")) {
|
2020-01-20 09:17:24 +01:00
|
|
|
if !var.is_empty() {
|
|
|
|
let proxy = Proxy::http(&var)
|
|
|
|
.expect("Could not set HTTP proxy. Please check $http_proxy env var");
|
|
|
|
builder = builder.proxy(proxy);
|
|
|
|
}
|
2020-01-20 09:02:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
builder.build()
|
|
|
|
}
|
|
|
|
|
2019-08-23 05:17:15 +02:00
|
|
|
fn main() {
|
2019-10-10 07:28:12 +02:00
|
|
|
let app_args = AppArgs::get();
|
2019-12-26 06:41:03 +01:00
|
|
|
|
2020-01-15 04:26:04 +01:00
|
|
|
if !is_valid_url(app_args.url_target.as_str()) {
|
|
|
|
eprintln!(
|
|
|
|
"Only HTTP and HTTPS URLs are allowed but got: {}",
|
|
|
|
&app_args.url_target
|
|
|
|
);
|
|
|
|
process::exit(1);
|
2019-12-26 06:41:03 +01:00
|
|
|
}
|
|
|
|
|
2020-01-15 04:26:04 +01:00
|
|
|
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
|
|
|
let mut cache = HashMap::new();
|
2020-01-20 09:02:08 +01:00
|
|
|
let client = create_http_client(&app_args).expect("Failed to initialize HTTP client");
|
2019-09-22 02:06:00 +02:00
|
|
|
|
2020-01-15 04:26:04 +01:00
|
|
|
// Retrieve root document
|
|
|
|
let (data, final_url) = retrieve_asset(
|
|
|
|
&mut cache,
|
|
|
|
&client,
|
|
|
|
app_args.url_target.as_str(),
|
|
|
|
false,
|
|
|
|
"",
|
|
|
|
app_args.silent,
|
|
|
|
)
|
|
|
|
.expect("Could not retrieve assets in HTML");
|
|
|
|
let dom = html_to_dom(&data);
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2020-01-15 04:26:04 +01:00
|
|
|
walk_and_embed_assets(
|
|
|
|
&mut cache,
|
|
|
|
&client,
|
|
|
|
&final_url,
|
|
|
|
&dom.document,
|
|
|
|
app_args.no_css,
|
|
|
|
app_args.no_js,
|
|
|
|
app_args.no_images,
|
|
|
|
app_args.silent,
|
|
|
|
app_args.no_frames,
|
|
|
|
);
|
|
|
|
|
|
|
|
let html: String = stringify_document(
|
|
|
|
&dom.document,
|
|
|
|
app_args.no_css,
|
|
|
|
app_args.no_frames,
|
|
|
|
app_args.no_js,
|
|
|
|
app_args.no_images,
|
|
|
|
app_args.isolate,
|
|
|
|
);
|
|
|
|
|
|
|
|
output
|
|
|
|
.writeln_str(&html)
|
|
|
|
.expect("Could not write HTML output");
|
2019-08-23 05:17:15 +02:00
|
|
|
}
|