add support for embedding video and audio files

This commit is contained in:
Sunshine 2020-12-25 15:55:52 -10:00
parent 2b9caf9840
commit 4ba4285b6b
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
4 changed files with 206 additions and 13 deletions

View file

@ -54,6 +54,7 @@ The guide can be found [here](docs/containers.md)
---------------------------------------------------
## Options
- `-a`: Exclude audio sources
- `-b`: Use custom base URL
- `-c`: Exclude CSS
- `-e`: Ignore network errors
@ -68,6 +69,7 @@ The guide can be found [here](docs/containers.md)
- `-s`: Be quiet
- `-t`: Adjust network request timeout
- `-u`: Provide custom User-Agent
- `-v`: Exclude videos
---------------------------------------------------

View file

@ -759,7 +759,7 @@ pub fn walk_and_embed_assets(
}
}
"img" => {
// Find source attribute(s)
// Find src and data-src attribute(s)
let img_attr_src_value: Option<String> = get_node_attr(node, "src");
let img_attr_data_src_value: Option<String> = get_node_attr(node, "data-src");
@ -959,14 +959,101 @@ pub fn walk_and_embed_assets(
}
}
"source" => {
let parent_node = get_parent_node(node);
let parent_node_name: &str = get_node_name(&parent_node).unwrap_or_default();
if let Some(source_attr_src_value) = get_node_attr(node, "src") {
let src_full_url: String = resolve_url(&url, source_attr_src_value.clone())
if parent_node_name == "audio" {
if options.no_audio {
set_node_attr(node, "src", None);
} else {
let src_full_url: String =
resolve_url(&url, source_attr_src_value.clone())
.unwrap_or_else(|_| source_attr_src_value.to_string());
set_node_attr(node, "src", Some(src_full_url));
let src_url_fragment = get_url_fragment(src_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&src_full_url,
options,
depth + 1,
) {
Ok((src_data, src_final_url, src_media_type)) => {
let src_data_url = data_to_data_url(
&src_media_type,
&src_data,
&src_final_url,
);
let assembled_url: String = url_with_fragment(
src_data_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
}
Err(_) => {
if is_http_url(src_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
src_full_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "src", None);
}
}
}
}
} else if parent_node_name == "video" {
if options.no_video {
set_node_attr(node, "src", None);
} else {
let src_full_url: String =
resolve_url(&url, source_attr_src_value.clone())
.unwrap_or_else(|_| source_attr_src_value.to_string());
let src_url_fragment = get_url_fragment(src_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&src_full_url,
options,
depth + 1,
) {
Ok((src_data, src_final_url, src_media_type)) => {
let src_data_url = data_to_data_url(
&src_media_type,
&src_data,
&src_final_url,
);
let assembled_url: String = url_with_fragment(
src_data_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
}
Err(_) => {
if is_http_url(src_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
src_full_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "src", None);
}
}
}
}
}
}
if let Some(source_attr_srcset_value) = get_node_attr(node, "srcset") {
if get_node_name(&get_parent_node(&node)) == Some("picture") {
if parent_node_name == "picture" {
if options.no_images {
set_node_attr(node, "srcset", Some(str!(empty_image!())));
} else {
@ -994,13 +1081,16 @@ pub fn walk_and_embed_assets(
set_node_attr(node, "srcset", Some(assembled_url));
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(srcset_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
srcset_full_url.as_str(),
srcset_url_fragment.as_str(),
);
set_node_attr(node, "srcset", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "srcset", None);
}
}
}
@ -1192,7 +1282,99 @@ pub fn walk_and_embed_assets(
}
}
}
"audio" => {
if let Some(audio_attr_src_value) = get_node_attr(node, "src") {
if options.no_audio {
set_node_attr(node, "src", None);
} else {
let src_full_url: String =
resolve_url(&url, audio_attr_src_value.clone())
.unwrap_or_else(|_| audio_attr_src_value.to_string());
let src_url_fragment = get_url_fragment(src_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&src_full_url,
options,
depth + 1,
) {
Ok((src_data, src_final_url, src_media_type)) => {
let src_data_url = data_to_data_url(
&src_media_type,
&src_data,
&src_final_url,
);
let assembled_url: String = url_with_fragment(
src_data_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
}
Err(_) => {
if is_http_url(src_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
src_full_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "src", None);
}
}
}
}
}
}
"video" => {
if let Some(video_attr_src_value) = get_node_attr(node, "src") {
if options.no_video {
set_node_attr(node, "src", None);
} else {
let src_full_url: String =
resolve_url(&url, video_attr_src_value.clone())
.unwrap_or_else(|_| video_attr_src_value.to_string());
let src_url_fragment = get_url_fragment(src_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&src_full_url,
options,
depth + 1,
) {
Ok((src_data, src_final_url, src_media_type)) => {
let src_data_url = data_to_data_url(
&src_media_type,
&src_data,
&src_final_url,
);
let assembled_url: String = url_with_fragment(
src_data_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
}
Err(_) => {
if is_http_url(src_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
src_full_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "src", None);
}
}
}
}
}
// Embed poster images
if let Some(video_attr_poster_value) = get_node_attr(node, "poster") {
// Skip posters with empty source
if !video_attr_poster_value.is_empty() {
@ -1228,13 +1410,16 @@ pub fn walk_and_embed_assets(
set_node_attr(node, "poster", Some(assembled_url));
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(video_poster_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
video_poster_full_url.as_str(),
video_poster_url_fragment.as_str(),
);
set_node_attr(node, "poster", Some(assembled_url));
} else {
// Get rid of poster attribute if the URL is not remote
set_node_attr(node, "poster", None);
}
}
}

View file

@ -2,6 +2,7 @@ use clap::{App, Arg};
#[derive(Default)]
pub struct Options {
pub no_audio: bool,
pub base_url: Option<String>,
pub no_css: bool,
pub ignore_errors: bool,
@ -16,6 +17,7 @@ pub struct Options {
pub silent: bool,
pub timeout: u64,
pub user_agent: String,
pub no_video: bool,
pub target: String,
}
@ -38,8 +40,8 @@ impl Options {
.version(crate_version!())
.author(crate_authors!("\n"))
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
// .args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Use custom base URL'")
.args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
@ -49,11 +51,11 @@ impl Options {
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
// .args_from_usage("-v, --no-video 'Removes video sources'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
.args_from_usage("-v, --no-video 'Removes video sources'")
.arg(
Arg::with_name("target")
.required(true)
@ -69,6 +71,7 @@ impl Options {
.value_of("target")
.expect("please set target")
.to_string();
options.no_audio = app.is_present("no-audio");
if let Some(base_url) = app.value_of("base-url") {
options.base_url = Some(str!(base_url));
}
@ -92,6 +95,7 @@ impl Options {
.value_of("user-agent")
.unwrap_or(DEFAULT_USER_AGENT)
.to_string();
options.no_video = app.is_present("no-video");
options
}

View file

@ -14,6 +14,7 @@ mod passing {
let options: Options = Options::default();
assert_eq!(options.target, str!());
assert_eq!(options.no_audio, false);
assert_eq!(options.no_css, false);
assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false);
@ -26,5 +27,6 @@ mod passing {
assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, "");
assert_eq!(options.no_video, false);
}
}