Use a shared client initialized in main.rs
This commit is contained in:
parent
35f5e1353d
commit
65d0eab793
3 changed files with 36 additions and 35 deletions
34
src/html.rs
34
src/html.rs
|
@ -7,6 +7,7 @@ use html5ever::tree_builder::{Attribute, TreeSink};
|
|||
use html5ever::{local_name, namespace_url, ns};
|
||||
use http::retrieve_asset;
|
||||
use js::attr_is_event_handler;
|
||||
use reqwest::Client;
|
||||
use std::collections::HashMap;
|
||||
use std::default::Default;
|
||||
use utils::{data_to_dataurl, is_valid_url, resolve_url, url_has_protocol};
|
||||
|
@ -45,14 +46,13 @@ pub fn is_icon(attr_value: &str) -> bool {
|
|||
|
||||
pub fn walk_and_embed_assets(
|
||||
cache: &mut HashMap<String, String>,
|
||||
client: &Client,
|
||||
url: &str,
|
||||
node: &Handle,
|
||||
opt_no_css: bool,
|
||||
opt_no_js: bool,
|
||||
opt_no_images: bool,
|
||||
opt_user_agent: &str,
|
||||
opt_silent: bool,
|
||||
opt_insecure: bool,
|
||||
opt_no_frames: bool,
|
||||
) {
|
||||
match node.data {
|
||||
|
@ -61,14 +61,13 @@ pub fn walk_and_embed_assets(
|
|||
for child in node.children.borrow().iter() {
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
child,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
opt_no_frames,
|
||||
);
|
||||
}
|
||||
|
@ -107,12 +106,11 @@ pub fn walk_and_embed_assets(
|
|||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (favicon_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&href_full_url,
|
||||
true,
|
||||
"",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
)
|
||||
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
|
||||
attr.value.clear();
|
||||
|
@ -131,12 +129,11 @@ pub fn walk_and_embed_assets(
|
|||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (css_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&href_full_url,
|
||||
true,
|
||||
"text/css",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
)
|
||||
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
|
||||
attr.value.clear();
|
||||
|
@ -174,12 +171,11 @@ pub fn walk_and_embed_assets(
|
|||
resolve_url(&url, &value).unwrap_or(EMPTY_STRING.clone());
|
||||
let (img_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&src_full_url,
|
||||
true,
|
||||
"",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
)
|
||||
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
|
||||
attr.value.clear();
|
||||
|
@ -208,12 +204,11 @@ pub fn walk_and_embed_assets(
|
|||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (source_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&srcset_full_url,
|
||||
true,
|
||||
"",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
)
|
||||
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
|
||||
attr.value.clear();
|
||||
|
@ -255,12 +250,11 @@ pub fn walk_and_embed_assets(
|
|||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (js_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&src_full_url,
|
||||
true,
|
||||
"application/javascript",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
)
|
||||
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
|
||||
attr.value.clear();
|
||||
|
@ -309,25 +303,23 @@ pub fn walk_and_embed_assets(
|
|||
resolve_url(&url, &iframe_src).unwrap_or(EMPTY_STRING.clone());
|
||||
let (iframe_data, iframe_final_url) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&src_full_url,
|
||||
false,
|
||||
"text/html",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
)
|
||||
.unwrap_or((EMPTY_STRING.clone(), src_full_url));
|
||||
let dom = html_to_dom(&iframe_data);
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
&iframe_final_url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
opt_no_frames,
|
||||
);
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
|
@ -355,12 +347,11 @@ pub fn walk_and_embed_assets(
|
|||
.unwrap_or(EMPTY_STRING.clone());
|
||||
let (poster_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&poster_full_url,
|
||||
true,
|
||||
"",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
)
|
||||
.unwrap_or((poster_full_url, EMPTY_STRING.clone()));
|
||||
attr.value.clear();
|
||||
|
@ -404,14 +395,13 @@ pub fn walk_and_embed_assets(
|
|||
for child in node.children.borrow().iter() {
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
child,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
opt_no_frames,
|
||||
);
|
||||
}
|
||||
|
|
12
src/http.rs
12
src/http.rs
|
@ -1,17 +1,15 @@
|
|||
use reqwest::header::{CONTENT_TYPE, USER_AGENT};
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use reqwest::Client;
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
use utils::{data_to_dataurl, is_data_url};
|
||||
|
||||
pub fn retrieve_asset(
|
||||
cache: &mut HashMap<String, String>,
|
||||
client: &Client,
|
||||
url: &str,
|
||||
as_dataurl: bool,
|
||||
mime: &str,
|
||||
opt_user_agent: &str,
|
||||
opt_silent: bool,
|
||||
opt_insecure: bool,
|
||||
) -> Result<(String, String), reqwest::Error> {
|
||||
if is_data_url(&url).unwrap() {
|
||||
Ok((url.to_string(), url.to_string()))
|
||||
|
@ -25,11 +23,7 @@ pub fn retrieve_asset(
|
|||
Ok((data.to_string(), url.to_string()))
|
||||
} else {
|
||||
// url not in cache, we request it
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.danger_accept_invalid_certs(opt_insecure)
|
||||
.build()?;
|
||||
let mut response = client.get(url).header(USER_AGENT, opt_user_agent).send()?;
|
||||
let mut response = client.get(url).send()?;
|
||||
|
||||
if !opt_silent {
|
||||
if url == response.url().as_str() {
|
||||
|
|
25
src/main.rs
25
src/main.rs
|
@ -1,6 +1,7 @@
|
|||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate monolith;
|
||||
extern crate reqwest;
|
||||
|
||||
mod args;
|
||||
|
||||
|
@ -8,34 +9,50 @@ use args::AppArgs;
|
|||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||
use monolith::http::retrieve_asset;
|
||||
use monolith::utils::is_valid_url;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
|
||||
fn main() {
|
||||
let app_args = AppArgs::get();
|
||||
let cache = &mut HashMap::new();
|
||||
if is_valid_url(app_args.url_target.as_str()) {
|
||||
// Initialize client
|
||||
let mut header_map = HeaderMap::new();
|
||||
match HeaderValue::from_str(&app_args.user_agent) {
|
||||
Ok(header) => header_map.insert(USER_AGENT, header),
|
||||
Err(err) => {
|
||||
eprintln!("Invalid user agent! {}", err);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.danger_accept_invalid_certs(app_args.insecure)
|
||||
.default_headers(header_map)
|
||||
.build()
|
||||
.expect("Failed to initialize HTTP client");
|
||||
|
||||
let (data, final_url) = retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
app_args.url_target.as_str(),
|
||||
false,
|
||||
"",
|
||||
app_args.user_agent.as_str(),
|
||||
app_args.silent,
|
||||
app_args.insecure,
|
||||
)
|
||||
.unwrap();
|
||||
let dom = html_to_dom(&data);
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&final_url,
|
||||
&dom.document,
|
||||
app_args.no_css,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.user_agent.as_str(),
|
||||
app_args.silent,
|
||||
app_args.insecure,
|
||||
app_args.no_frames,
|
||||
);
|
||||
|
||||
|
|
Loading…
Reference in a new issue