2019-08-23 11:49:14 +02:00
|
|
|
#[macro_use]
|
2019-08-23 05:17:15 +02:00
|
|
|
extern crate clap;
|
|
|
|
extern crate monolith;
|
|
|
|
|
2019-08-23 20:24:45 +02:00
|
|
|
use clap::{App, Arg};
|
2019-09-22 02:06:00 +02:00
|
|
|
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
2019-09-29 23:15:49 +02:00
|
|
|
use monolith::http::retrieve_asset;
|
|
|
|
use monolith::utils::is_valid_url;
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-09-29 23:15:49 +02:00
|
|
|
const DEFAULT_USER_AGENT: &str =
|
2019-08-24 05:06:06 +02:00
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
|
2019-08-23 20:33:18 +02:00
|
|
|
|
2019-08-23 05:17:15 +02:00
|
|
|
fn main() {
|
|
|
|
let command = App::new("monolith")
|
2019-08-23 11:49:14 +02:00
|
|
|
.version(crate_version!())
|
2019-08-23 20:44:16 +02:00
|
|
|
.author(crate_authors!("\n"))
|
2019-08-23 11:49:14 +02:00
|
|
|
.about(crate_description!())
|
2019-08-23 20:24:45 +02:00
|
|
|
.arg(
|
|
|
|
Arg::with_name("url")
|
|
|
|
.required(true)
|
|
|
|
.takes_value(true)
|
|
|
|
.index(1)
|
|
|
|
.help("URL to download"),
|
|
|
|
)
|
2019-09-22 18:57:50 +02:00
|
|
|
// .args_from_usage("-a, --include-audio 'Embed audio sources'")
|
2019-09-22 02:06:00 +02:00
|
|
|
.args_from_usage("-c, --no-css 'Ignore styles'")
|
|
|
|
.args_from_usage("-f, --no-frames 'Exclude iframes'")
|
|
|
|
.args_from_usage("-i, --no-images 'Remove images'")
|
|
|
|
.args_from_usage("-I, --isolate 'Cut off from the Internet'")
|
|
|
|
.args_from_usage("-j, --no-js 'Exclude JavaScript'")
|
2019-08-27 05:17:36 +02:00
|
|
|
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
|
2019-08-25 17:41:30 +02:00
|
|
|
.args_from_usage("-s, --silent 'Suppress verbosity'")
|
2019-08-23 22:00:05 +02:00
|
|
|
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
|
2019-09-22 18:57:50 +02:00
|
|
|
// .args_from_usage("-v, --include-video 'Embed video sources'")
|
2019-08-23 05:17:15 +02:00
|
|
|
.get_matches();
|
|
|
|
|
|
|
|
// Process the command
|
2019-09-22 02:06:00 +02:00
|
|
|
let arg_target: &str = command.value_of("url").unwrap();
|
|
|
|
let opt_no_css: bool = command.is_present("no-css");
|
|
|
|
let opt_no_frames: bool = command.is_present("no-frames");
|
|
|
|
let opt_no_images: bool = command.is_present("no-images");
|
|
|
|
let opt_no_js: bool = command.is_present("no-js");
|
|
|
|
let opt_insecure: bool = command.is_present("insecure");
|
|
|
|
let opt_isolate: bool = command.is_present("isolate");
|
|
|
|
let opt_silent: bool = command.is_present("silent");
|
|
|
|
let opt_user_agent: &str = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT);
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-08-23 20:24:45 +02:00
|
|
|
if is_valid_url(arg_target) {
|
2019-10-01 05:58:09 +02:00
|
|
|
let (data, final_url) = retrieve_asset(
|
2019-09-22 02:06:00 +02:00
|
|
|
&arg_target,
|
|
|
|
false,
|
|
|
|
"",
|
|
|
|
opt_user_agent,
|
|
|
|
opt_silent,
|
|
|
|
opt_insecure,
|
|
|
|
)
|
|
|
|
.unwrap();
|
2019-08-24 05:06:06 +02:00
|
|
|
let dom = html_to_dom(&data);
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-08-24 05:06:06 +02:00
|
|
|
walk_and_embed_assets(
|
2019-10-01 05:58:09 +02:00
|
|
|
&final_url,
|
2019-08-24 05:06:06 +02:00
|
|
|
&dom.document,
|
2019-09-22 02:06:00 +02:00
|
|
|
opt_no_css,
|
2019-08-24 05:06:06 +02:00
|
|
|
opt_no_js,
|
|
|
|
opt_no_images,
|
|
|
|
opt_user_agent,
|
2019-08-25 17:41:30 +02:00
|
|
|
opt_silent,
|
2019-08-27 05:17:36 +02:00
|
|
|
opt_insecure,
|
2019-09-22 02:06:00 +02:00
|
|
|
opt_no_frames,
|
|
|
|
);
|
|
|
|
|
|
|
|
let html: String = stringify_document(
|
|
|
|
&dom.document,
|
|
|
|
opt_no_css,
|
|
|
|
opt_no_frames,
|
|
|
|
opt_no_js,
|
|
|
|
opt_no_images,
|
|
|
|
opt_isolate,
|
2019-08-24 05:06:06 +02:00
|
|
|
);
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-09-22 02:06:00 +02:00
|
|
|
println!("{}", html);
|
2019-08-23 05:17:15 +02:00
|
|
|
}
|
|
|
|
}
|