Merge pull request #219 from snshn/ignore-network-errors-option
Account for network errors
This commit is contained in:
commit
5ac520b4da
11 changed files with 109 additions and 70 deletions
|
@ -54,9 +54,10 @@ The guide can be found [here](docs/containers.md)
|
|||
---------------------------------------------------
|
||||
|
||||
## Options
|
||||
- `-c`: Ignore styles
|
||||
- `-f`: Exclude frames
|
||||
- `-F`: Omit web fonts
|
||||
- `-c`: Exclude CSS
|
||||
- `-e`: Ignore network errors
|
||||
- `-f`: Omit frames
|
||||
- `-F`: Exclude web fonts
|
||||
- `-i`: Remove images
|
||||
- `-I`: Isolate the document
|
||||
- `-j`: Exclude JavaScript
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# 2. Network request timeout
|
||||
# 3. Network request timeout
|
||||
|
||||
Date: 2020-02-15
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# 4. Asset Minimization
|
||||
# 5. Asset Minimization
|
||||
|
||||
Date: 2020-03-14
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# 4. Reload and location `meta` tags
|
||||
# 6. Reload and location `meta` tags
|
||||
|
||||
Date: 2020-06-25
|
||||
|
||||
|
|
19
docs/arch/0007-network-errors.md
Normal file
19
docs/arch/0007-network-errors.md
Normal file
|
@ -0,0 +1,19 @@
|
|||
# 7. Network errors
|
||||
|
||||
Date: 2020-11-22
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
|
||||
|
||||
## Decision
|
||||
|
||||
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.
|
16
src/css.rs
16
src/css.rs
|
@ -176,7 +176,7 @@ pub fn process_css<'a>(
|
|||
client,
|
||||
&parent_url,
|
||||
&import_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((import_contents, import_final_url, _import_media_type)) => {
|
||||
|
@ -227,7 +227,7 @@ pub fn process_css<'a>(
|
|||
client,
|
||||
&parent_url,
|
||||
&resolved_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
|
@ -315,14 +315,8 @@ pub fn process_css<'a>(
|
|||
if is_import {
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&full_url,
|
||||
options.silent,
|
||||
depth + 1,
|
||||
) {
|
||||
match retrieve_asset(cache, client, &parent_url, &full_url, options, depth + 1)
|
||||
{
|
||||
Ok((css, final_url, _media_type)) => {
|
||||
let data_url = data_to_data_url(
|
||||
"text/css",
|
||||
|
@ -361,7 +355,7 @@ pub fn process_css<'a>(
|
|||
client,
|
||||
&parent_url,
|
||||
&full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
|
|
22
src/html.rs
22
src/html.rs
|
@ -148,7 +148,7 @@ pub fn embed_srcset(
|
|||
client,
|
||||
&parent_url,
|
||||
&image_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((image_data, image_final_url, image_media_type)) => {
|
||||
|
@ -539,7 +539,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&link_href_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((
|
||||
|
@ -612,7 +612,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&link_href_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((
|
||||
|
@ -723,7 +723,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&background_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((background_data, background_final_url, background_media_type)) => {
|
||||
|
@ -807,7 +807,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&img_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((img_data, img_final_url, img_media_type)) => {
|
||||
|
@ -898,7 +898,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&input_image_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((
|
||||
|
@ -961,7 +961,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&image_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((image_data, image_final_url, image_media_type)) => {
|
||||
|
@ -1020,7 +1020,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&srcset_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((srcset_data, srcset_final_url, srcset_media_type)) => {
|
||||
|
@ -1103,7 +1103,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&script_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((script_data, script_final_url, _script_media_type)) => {
|
||||
|
@ -1196,7 +1196,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&frame_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((frame_data, frame_final_url, frame_media_type)) => {
|
||||
|
@ -1269,7 +1269,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&url,
|
||||
&video_poster_full_url,
|
||||
options.silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((
|
||||
|
|
15
src/main.rs
15
src/main.rs
|
@ -113,20 +113,15 @@ fn main() {
|
|||
|
||||
// Retrieve target document
|
||||
if is_file_url(target_url) || is_http_url(target_url) {
|
||||
match retrieve_asset(
|
||||
&mut cache,
|
||||
&client,
|
||||
target_url,
|
||||
target_url,
|
||||
options.silent,
|
||||
0,
|
||||
) {
|
||||
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
|
||||
Ok((data, final_url, _media_type)) => {
|
||||
base_url = final_url;
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Could not retrieve target document");
|
||||
if !options.silent {
|
||||
eprintln!("Could not retrieve target document");
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
|
@ -159,7 +154,7 @@ fn main() {
|
|||
&client,
|
||||
&base_url,
|
||||
&favicon_ico_url,
|
||||
options.silent,
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
|
|
|
@ -4,6 +4,7 @@ use clap::{App, Arg};
|
|||
pub struct Options {
|
||||
pub target: String,
|
||||
pub no_css: bool,
|
||||
pub ignore_errors: bool,
|
||||
pub no_fonts: bool,
|
||||
pub no_frames: bool,
|
||||
pub no_images: bool,
|
||||
|
@ -45,6 +46,7 @@ impl Options {
|
|||
)
|
||||
// .args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
|
@ -66,6 +68,7 @@ impl Options {
|
|||
.expect("please set target")
|
||||
.to_string();
|
||||
options.no_css = app.is_present("no-css");
|
||||
options.ignore_errors = app.is_present("ignore-errors");
|
||||
options.no_frames = app.is_present("no-frames");
|
||||
options.no_fonts = app.is_present("no-fonts");
|
||||
options.no_images = app.is_present("no-images");
|
||||
|
|
|
@ -11,6 +11,7 @@ mod passing {
|
|||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::url;
|
||||
use crate::utils;
|
||||
|
||||
|
@ -19,6 +20,9 @@ mod passing {
|
|||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
// If both source and target are data URLs,
|
||||
// ensure the result contains target data URL
|
||||
let (data, final_url, media_type) = utils::retrieve_asset(
|
||||
|
@ -26,7 +30,7 @@ mod passing {
|
|||
&client,
|
||||
"data:text/html;base64,c291cmNl",
|
||||
"data:text/html;base64,dGFyZ2V0",
|
||||
false,
|
||||
&options,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
|
@ -46,6 +50,9 @@ mod passing {
|
|||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// Inclusion of local assets from local sources should be allowed
|
||||
|
@ -63,7 +70,7 @@ mod passing {
|
|||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
),
|
||||
false,
|
||||
&options,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
|
@ -91,6 +98,7 @@ mod failing {
|
|||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
|
@ -98,13 +106,16 @@ mod failing {
|
|||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
// Inclusion of local assets from data URL sources should not be allowed
|
||||
match utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,SoUrCe",
|
||||
"file:///etc/passwd",
|
||||
false,
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
|
@ -121,13 +132,16 @@ mod failing {
|
|||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
// Inclusion of local assets from remote sources should not be allowed
|
||||
match utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"https://kernel.org/",
|
||||
"file:///etc/passwd",
|
||||
false,
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
|
|
69
src/utils.rs
69
src/utils.rs
|
@ -4,6 +4,7 @@ use std::collections::HashMap;
|
|||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url};
|
||||
|
||||
const INDENT: &str = " ";
|
||||
|
@ -73,7 +74,7 @@ pub fn retrieve_asset(
|
|||
client: &Client,
|
||||
parent_url: &str,
|
||||
url: &str,
|
||||
opt_silent: bool,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
|
||||
if url.len() == 0 {
|
||||
|
@ -95,7 +96,7 @@ pub fn retrieve_asset(
|
|||
let fs_file_path: String = file_url_to_fs_path(url);
|
||||
let path = Path::new(&fs_file_path);
|
||||
if path.exists() {
|
||||
if !opt_silent {
|
||||
if !options.silent {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
}
|
||||
|
||||
|
@ -109,7 +110,7 @@ pub fn retrieve_asset(
|
|||
|
||||
if cache.contains_key(&cache_key) {
|
||||
// URL is in cache, we get and return it
|
||||
if !opt_silent {
|
||||
if !options.silent {
|
||||
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
||||
}
|
||||
|
||||
|
@ -120,34 +121,46 @@ pub fn retrieve_asset(
|
|||
))
|
||||
} else {
|
||||
// URL not in cache, we retrieve the file
|
||||
let mut response = client.get(url).send()?;
|
||||
let res_url = response.url().to_string();
|
||||
match client.get(url).send() {
|
||||
Ok(mut response) => {
|
||||
if !options.ignore_errors && response.status() != 200 {
|
||||
if !options.silent {
|
||||
eprintln!("Unable to retrieve {} ({})", &url, response.status());
|
||||
}
|
||||
// Provoke error
|
||||
return Err(client.get("").send().unwrap_err());
|
||||
}
|
||||
|
||||
if !opt_silent {
|
||||
if url == res_url {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
} else {
|
||||
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
|
||||
let res_url = response.url().to_string();
|
||||
|
||||
if !options.silent {
|
||||
if url == res_url {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
} else {
|
||||
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
|
||||
}
|
||||
}
|
||||
|
||||
let new_cache_key: String = clean_url(&res_url);
|
||||
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain media type by reading the Content-Type header
|
||||
let media_type = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
// Add retrieved resource to cache
|
||||
cache.insert(new_cache_key, data.clone());
|
||||
|
||||
Ok((data, res_url, media_type.to_string()))
|
||||
}
|
||||
Err(error) => Err(error),
|
||||
}
|
||||
|
||||
let new_cache_key: String = clean_url(&res_url);
|
||||
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain media type by reading the Content-Type header
|
||||
let media_type = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
// Add to cache
|
||||
cache.insert(new_cache_key, data.clone());
|
||||
|
||||
Ok((data, res_url, media_type.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue