add support for embedding video and audio files

This commit is contained in:
Sunshine 2020-12-25 15:55:52 -10:00
parent 2b9caf9840
commit 4ba4285b6b
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
4 changed files with 206 additions and 13 deletions

View file

@ -54,6 +54,7 @@ The guide can be found [here](docs/containers.md)
--------------------------------------------------- ---------------------------------------------------
## Options ## Options
- `-a`: Exclude audio sources
- `-b`: Use custom base URL - `-b`: Use custom base URL
- `-c`: Exclude CSS - `-c`: Exclude CSS
- `-e`: Ignore network errors - `-e`: Ignore network errors
@ -68,6 +69,7 @@ The guide can be found [here](docs/containers.md)
- `-s`: Be quiet - `-s`: Be quiet
- `-t`: Adjust network request timeout - `-t`: Adjust network request timeout
- `-u`: Provide custom User-Agent - `-u`: Provide custom User-Agent
- `-v`: Exclude videos
--------------------------------------------------- ---------------------------------------------------

View file

@ -759,7 +759,7 @@ pub fn walk_and_embed_assets(
} }
} }
"img" => { "img" => {
// Find source attribute(s) // Find src and data-src attribute(s)
let img_attr_src_value: Option<String> = get_node_attr(node, "src"); let img_attr_src_value: Option<String> = get_node_attr(node, "src");
let img_attr_data_src_value: Option<String> = get_node_attr(node, "data-src"); let img_attr_data_src_value: Option<String> = get_node_attr(node, "data-src");
@ -959,14 +959,101 @@ pub fn walk_and_embed_assets(
} }
} }
"source" => { "source" => {
let parent_node = get_parent_node(node);
let parent_node_name: &str = get_node_name(&parent_node).unwrap_or_default();
if let Some(source_attr_src_value) = get_node_attr(node, "src") { if let Some(source_attr_src_value) = get_node_attr(node, "src") {
let src_full_url: String = resolve_url(&url, source_attr_src_value.clone()) if parent_node_name == "audio" {
.unwrap_or_else(|_| source_attr_src_value.to_string()); if options.no_audio {
set_node_attr(node, "src", Some(src_full_url)); set_node_attr(node, "src", None);
} else {
let src_full_url: String =
resolve_url(&url, source_attr_src_value.clone())
.unwrap_or_else(|_| source_attr_src_value.to_string());
let src_url_fragment = get_url_fragment(src_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&src_full_url,
options,
depth + 1,
) {
Ok((src_data, src_final_url, src_media_type)) => {
let src_data_url = data_to_data_url(
&src_media_type,
&src_data,
&src_final_url,
);
let assembled_url: String = url_with_fragment(
src_data_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
}
Err(_) => {
if is_http_url(src_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
src_full_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "src", None);
}
}
}
}
} else if parent_node_name == "video" {
if options.no_video {
set_node_attr(node, "src", None);
} else {
let src_full_url: String =
resolve_url(&url, source_attr_src_value.clone())
.unwrap_or_else(|_| source_attr_src_value.to_string());
let src_url_fragment = get_url_fragment(src_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&src_full_url,
options,
depth + 1,
) {
Ok((src_data, src_final_url, src_media_type)) => {
let src_data_url = data_to_data_url(
&src_media_type,
&src_data,
&src_final_url,
);
let assembled_url: String = url_with_fragment(
src_data_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
}
Err(_) => {
if is_http_url(src_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
src_full_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "src", None);
}
}
}
}
}
} }
if let Some(source_attr_srcset_value) = get_node_attr(node, "srcset") { if let Some(source_attr_srcset_value) = get_node_attr(node, "srcset") {
if get_node_name(&get_parent_node(&node)) == Some("picture") { if parent_node_name == "picture" {
if options.no_images { if options.no_images {
set_node_attr(node, "srcset", Some(str!(empty_image!()))); set_node_attr(node, "srcset", Some(str!(empty_image!())));
} else { } else {
@ -994,13 +1081,16 @@ pub fn walk_and_embed_assets(
set_node_attr(node, "srcset", Some(assembled_url)); set_node_attr(node, "srcset", Some(assembled_url));
} }
Err(_) => { Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(srcset_full_url.clone()) { if is_http_url(srcset_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment( let assembled_url: String = url_with_fragment(
srcset_full_url.as_str(), srcset_full_url.as_str(),
srcset_url_fragment.as_str(), srcset_url_fragment.as_str(),
); );
set_node_attr(node, "srcset", Some(assembled_url)); set_node_attr(node, "srcset", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "srcset", None);
} }
} }
} }
@ -1192,7 +1282,99 @@ pub fn walk_and_embed_assets(
} }
} }
} }
"audio" => {
if let Some(audio_attr_src_value) = get_node_attr(node, "src") {
if options.no_audio {
set_node_attr(node, "src", None);
} else {
let src_full_url: String =
resolve_url(&url, audio_attr_src_value.clone())
.unwrap_or_else(|_| audio_attr_src_value.to_string());
let src_url_fragment = get_url_fragment(src_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&src_full_url,
options,
depth + 1,
) {
Ok((src_data, src_final_url, src_media_type)) => {
let src_data_url = data_to_data_url(
&src_media_type,
&src_data,
&src_final_url,
);
let assembled_url: String = url_with_fragment(
src_data_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
}
Err(_) => {
if is_http_url(src_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
src_full_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "src", None);
}
}
}
}
}
}
"video" => { "video" => {
if let Some(video_attr_src_value) = get_node_attr(node, "src") {
if options.no_video {
set_node_attr(node, "src", None);
} else {
let src_full_url: String =
resolve_url(&url, video_attr_src_value.clone())
.unwrap_or_else(|_| video_attr_src_value.to_string());
let src_url_fragment = get_url_fragment(src_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&src_full_url,
options,
depth + 1,
) {
Ok((src_data, src_final_url, src_media_type)) => {
let src_data_url = data_to_data_url(
&src_media_type,
&src_data,
&src_final_url,
);
let assembled_url: String = url_with_fragment(
src_data_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
}
Err(_) => {
if is_http_url(src_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
src_full_url.as_str(),
src_url_fragment.as_str(),
);
set_node_attr(node, "src", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "src", None);
}
}
}
}
}
// Embed poster images
if let Some(video_attr_poster_value) = get_node_attr(node, "poster") { if let Some(video_attr_poster_value) = get_node_attr(node, "poster") {
// Skip posters with empty source // Skip posters with empty source
if !video_attr_poster_value.is_empty() { if !video_attr_poster_value.is_empty() {
@ -1228,13 +1410,16 @@ pub fn walk_and_embed_assets(
set_node_attr(node, "poster", Some(assembled_url)); set_node_attr(node, "poster", Some(assembled_url));
} }
Err(_) => { Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(video_poster_full_url.clone()) { if is_http_url(video_poster_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment( let assembled_url: String = url_with_fragment(
video_poster_full_url.as_str(), video_poster_full_url.as_str(),
video_poster_url_fragment.as_str(), video_poster_url_fragment.as_str(),
); );
set_node_attr(node, "poster", Some(assembled_url)); set_node_attr(node, "poster", Some(assembled_url));
} else {
// Get rid of poster attribute if the URL is not remote
set_node_attr(node, "poster", None);
} }
} }
} }

View file

@ -2,6 +2,7 @@ use clap::{App, Arg};
#[derive(Default)] #[derive(Default)]
pub struct Options { pub struct Options {
pub no_audio: bool,
pub base_url: Option<String>, pub base_url: Option<String>,
pub no_css: bool, pub no_css: bool,
pub ignore_errors: bool, pub ignore_errors: bool,
@ -16,6 +17,7 @@ pub struct Options {
pub silent: bool, pub silent: bool,
pub timeout: u64, pub timeout: u64,
pub user_agent: String, pub user_agent: String,
pub no_video: bool,
pub target: String, pub target: String,
} }
@ -38,8 +40,8 @@ impl Options {
.version(crate_version!()) .version(crate_version!())
.author(crate_authors!("\n")) .author(crate_authors!("\n"))
.about(format!("{}\n{}", ASCII, crate_description!()).as_str()) .about(format!("{}\n{}", ASCII, crate_description!()).as_str())
// .args_from_usage("-a, --no-audio 'Removes audio sources'") .args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Use custom base URL'") .args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
.args_from_usage("-c, --no-css 'Removes CSS'") .args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-e, --ignore-errors 'Ignore network errors'") .args_from_usage("-e, --ignore-errors 'Ignore network errors'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'") .args_from_usage("-f, --no-frames 'Removes frames and iframes'")
@ -49,11 +51,11 @@ impl Options {
.args_from_usage("-j, --no-js 'Removes JavaScript'") .args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'") .args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'") .args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'") .args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'") .args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'") .args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'") .args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
// .args_from_usage("-v, --no-video 'Removes video sources'") .args_from_usage("-v, --no-video 'Removes video sources'")
.arg( .arg(
Arg::with_name("target") Arg::with_name("target")
.required(true) .required(true)
@ -69,6 +71,7 @@ impl Options {
.value_of("target") .value_of("target")
.expect("please set target") .expect("please set target")
.to_string(); .to_string();
options.no_audio = app.is_present("no-audio");
if let Some(base_url) = app.value_of("base-url") { if let Some(base_url) = app.value_of("base-url") {
options.base_url = Some(str!(base_url)); options.base_url = Some(str!(base_url));
} }
@ -92,6 +95,7 @@ impl Options {
.value_of("user-agent") .value_of("user-agent")
.unwrap_or(DEFAULT_USER_AGENT) .unwrap_or(DEFAULT_USER_AGENT)
.to_string(); .to_string();
options.no_video = app.is_present("no-video");
options options
} }

View file

@ -14,6 +14,7 @@ mod passing {
let options: Options = Options::default(); let options: Options = Options::default();
assert_eq!(options.target, str!()); assert_eq!(options.target, str!());
assert_eq!(options.no_audio, false);
assert_eq!(options.no_css, false); assert_eq!(options.no_css, false);
assert_eq!(options.no_frames, false); assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false); assert_eq!(options.no_fonts, false);
@ -26,5 +27,6 @@ mod passing {
assert_eq!(options.silent, false); assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0); assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, ""); assert_eq!(options.user_agent, "");
assert_eq!(options.no_video, false);
} }
} }