Merge pull request #228 from snshn/audio-video-support
Add support for embedding video and audio files
This commit is contained in:
commit
0533b287b7
4 changed files with 206 additions and 13 deletions
|
@ -54,6 +54,7 @@ The guide can be found [here](docs/containers.md)
|
|||
---------------------------------------------------
|
||||
|
||||
## Options
|
||||
- `-a`: Exclude audio sources
|
||||
- `-b`: Use custom base URL
|
||||
- `-c`: Exclude CSS
|
||||
- `-e`: Ignore network errors
|
||||
|
@ -68,6 +69,7 @@ The guide can be found [here](docs/containers.md)
|
|||
- `-s`: Be quiet
|
||||
- `-t`: Adjust network request timeout
|
||||
- `-u`: Provide custom User-Agent
|
||||
- `-v`: Exclude videos
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
|
199
src/html.rs
199
src/html.rs
|
@ -759,7 +759,7 @@ pub fn walk_and_embed_assets(
|
|||
}
|
||||
}
|
||||
"img" => {
|
||||
// Find source attribute(s)
|
||||
// Find src and data-src attribute(s)
|
||||
let img_attr_src_value: Option<String> = get_node_attr(node, "src");
|
||||
let img_attr_data_src_value: Option<String> = get_node_attr(node, "data-src");
|
||||
|
||||
|
@ -959,14 +959,101 @@ pub fn walk_and_embed_assets(
|
|||
}
|
||||
}
|
||||
"source" => {
|
||||
let parent_node = get_parent_node(node);
|
||||
let parent_node_name: &str = get_node_name(&parent_node).unwrap_or_default();
|
||||
|
||||
if let Some(source_attr_src_value) = get_node_attr(node, "src") {
|
||||
let src_full_url: String = resolve_url(&url, source_attr_src_value.clone())
|
||||
.unwrap_or_else(|_| source_attr_src_value.to_string());
|
||||
set_node_attr(node, "src", Some(src_full_url));
|
||||
if parent_node_name == "audio" {
|
||||
if options.no_audio {
|
||||
set_node_attr(node, "src", None);
|
||||
} else {
|
||||
let src_full_url: String =
|
||||
resolve_url(&url, source_attr_src_value.clone())
|
||||
.unwrap_or_else(|_| source_attr_src_value.to_string());
|
||||
let src_url_fragment = get_url_fragment(src_full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&src_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((src_data, src_final_url, src_media_type)) => {
|
||||
let src_data_url = data_to_data_url(
|
||||
&src_media_type,
|
||||
&src_data,
|
||||
&src_final_url,
|
||||
);
|
||||
let assembled_url: String = url_with_fragment(
|
||||
src_data_url.as_str(),
|
||||
src_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "src", Some(assembled_url));
|
||||
}
|
||||
Err(_) => {
|
||||
if is_http_url(src_full_url.clone()) {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
let assembled_url: String = url_with_fragment(
|
||||
src_full_url.as_str(),
|
||||
src_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "src", Some(assembled_url));
|
||||
} else {
|
||||
// Exclude non-remote URLs
|
||||
set_node_attr(node, "src", None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if parent_node_name == "video" {
|
||||
if options.no_video {
|
||||
set_node_attr(node, "src", None);
|
||||
} else {
|
||||
let src_full_url: String =
|
||||
resolve_url(&url, source_attr_src_value.clone())
|
||||
.unwrap_or_else(|_| source_attr_src_value.to_string());
|
||||
let src_url_fragment = get_url_fragment(src_full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&src_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((src_data, src_final_url, src_media_type)) => {
|
||||
let src_data_url = data_to_data_url(
|
||||
&src_media_type,
|
||||
&src_data,
|
||||
&src_final_url,
|
||||
);
|
||||
let assembled_url: String = url_with_fragment(
|
||||
src_data_url.as_str(),
|
||||
src_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "src", Some(assembled_url));
|
||||
}
|
||||
Err(_) => {
|
||||
if is_http_url(src_full_url.clone()) {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
let assembled_url: String = url_with_fragment(
|
||||
src_full_url.as_str(),
|
||||
src_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "src", Some(assembled_url));
|
||||
} else {
|
||||
// Exclude non-remote URLs
|
||||
set_node_attr(node, "src", None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(source_attr_srcset_value) = get_node_attr(node, "srcset") {
|
||||
if get_node_name(&get_parent_node(&node)) == Some("picture") {
|
||||
if parent_node_name == "picture" {
|
||||
if options.no_images {
|
||||
set_node_attr(node, "srcset", Some(str!(empty_image!())));
|
||||
} else {
|
||||
|
@ -994,13 +1081,16 @@ pub fn walk_and_embed_assets(
|
|||
set_node_attr(node, "srcset", Some(assembled_url));
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(srcset_full_url.clone()) {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
let assembled_url: String = url_with_fragment(
|
||||
srcset_full_url.as_str(),
|
||||
srcset_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "srcset", Some(assembled_url));
|
||||
} else {
|
||||
// Exclude non-remote URLs
|
||||
set_node_attr(node, "srcset", None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1192,7 +1282,99 @@ pub fn walk_and_embed_assets(
|
|||
}
|
||||
}
|
||||
}
|
||||
"audio" => {
|
||||
if let Some(audio_attr_src_value) = get_node_attr(node, "src") {
|
||||
if options.no_audio {
|
||||
set_node_attr(node, "src", None);
|
||||
} else {
|
||||
let src_full_url: String =
|
||||
resolve_url(&url, audio_attr_src_value.clone())
|
||||
.unwrap_or_else(|_| audio_attr_src_value.to_string());
|
||||
let src_url_fragment = get_url_fragment(src_full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&src_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((src_data, src_final_url, src_media_type)) => {
|
||||
let src_data_url = data_to_data_url(
|
||||
&src_media_type,
|
||||
&src_data,
|
||||
&src_final_url,
|
||||
);
|
||||
let assembled_url: String = url_with_fragment(
|
||||
src_data_url.as_str(),
|
||||
src_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "src", Some(assembled_url));
|
||||
}
|
||||
Err(_) => {
|
||||
if is_http_url(src_full_url.clone()) {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
let assembled_url: String = url_with_fragment(
|
||||
src_full_url.as_str(),
|
||||
src_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "src", Some(assembled_url));
|
||||
} else {
|
||||
// Exclude non-remote URLs
|
||||
set_node_attr(node, "src", None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"video" => {
|
||||
if let Some(video_attr_src_value) = get_node_attr(node, "src") {
|
||||
if options.no_video {
|
||||
set_node_attr(node, "src", None);
|
||||
} else {
|
||||
let src_full_url: String =
|
||||
resolve_url(&url, video_attr_src_value.clone())
|
||||
.unwrap_or_else(|_| video_attr_src_value.to_string());
|
||||
let src_url_fragment = get_url_fragment(src_full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&src_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((src_data, src_final_url, src_media_type)) => {
|
||||
let src_data_url = data_to_data_url(
|
||||
&src_media_type,
|
||||
&src_data,
|
||||
&src_final_url,
|
||||
);
|
||||
let assembled_url: String = url_with_fragment(
|
||||
src_data_url.as_str(),
|
||||
src_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "src", Some(assembled_url));
|
||||
}
|
||||
Err(_) => {
|
||||
if is_http_url(src_full_url.clone()) {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
let assembled_url: String = url_with_fragment(
|
||||
src_full_url.as_str(),
|
||||
src_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "src", Some(assembled_url));
|
||||
} else {
|
||||
// Exclude non-remote URLs
|
||||
set_node_attr(node, "src", None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Embed poster images
|
||||
if let Some(video_attr_poster_value) = get_node_attr(node, "poster") {
|
||||
// Skip posters with empty source
|
||||
if !video_attr_poster_value.is_empty() {
|
||||
|
@ -1228,13 +1410,16 @@ pub fn walk_and_embed_assets(
|
|||
set_node_attr(node, "poster", Some(assembled_url));
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(video_poster_full_url.clone()) {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
let assembled_url: String = url_with_fragment(
|
||||
video_poster_full_url.as_str(),
|
||||
video_poster_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "poster", Some(assembled_url));
|
||||
} else {
|
||||
// Get rid of poster attribute if the URL is not remote
|
||||
set_node_attr(node, "poster", None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
16
src/opts.rs
16
src/opts.rs
|
@ -2,6 +2,7 @@ use clap::{App, Arg};
|
|||
|
||||
#[derive(Default)]
|
||||
pub struct Options {
|
||||
pub no_audio: bool,
|
||||
pub base_url: Option<String>,
|
||||
pub no_css: bool,
|
||||
pub ignore_errors: bool,
|
||||
|
@ -16,6 +17,7 @@ pub struct Options {
|
|||
pub silent: bool,
|
||||
pub timeout: u64,
|
||||
pub user_agent: String,
|
||||
pub no_video: bool,
|
||||
pub target: String,
|
||||
}
|
||||
|
||||
|
@ -38,8 +40,8 @@ impl Options {
|
|||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
|
||||
// .args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Use custom base URL'")
|
||||
.args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
|
@ -49,11 +51,11 @@ impl Options {
|
|||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
|
||||
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
|
||||
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
|
||||
// .args_from_usage("-v, --no-video 'Removes video sources'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
||||
.args_from_usage("-v, --no-video 'Removes video sources'")
|
||||
.arg(
|
||||
Arg::with_name("target")
|
||||
.required(true)
|
||||
|
@ -69,6 +71,7 @@ impl Options {
|
|||
.value_of("target")
|
||||
.expect("please set target")
|
||||
.to_string();
|
||||
options.no_audio = app.is_present("no-audio");
|
||||
if let Some(base_url) = app.value_of("base-url") {
|
||||
options.base_url = Some(str!(base_url));
|
||||
}
|
||||
|
@ -92,6 +95,7 @@ impl Options {
|
|||
.value_of("user-agent")
|
||||
.unwrap_or(DEFAULT_USER_AGENT)
|
||||
.to_string();
|
||||
options.no_video = app.is_present("no-video");
|
||||
|
||||
options
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ mod passing {
|
|||
let options: Options = Options::default();
|
||||
|
||||
assert_eq!(options.target, str!());
|
||||
assert_eq!(options.no_audio, false);
|
||||
assert_eq!(options.no_css, false);
|
||||
assert_eq!(options.no_frames, false);
|
||||
assert_eq!(options.no_fonts, false);
|
||||
|
@ -26,5 +27,6 @@ mod passing {
|
|||
assert_eq!(options.silent, false);
|
||||
assert_eq!(options.timeout, 0);
|
||||
assert_eq!(options.user_agent, "");
|
||||
assert_eq!(options.no_video, false);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue