diff --git a/.gitignore b/.gitignore index 088ba6b..81c5833 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ Cargo.lock # These are backup files generated by rustfmt **/*.rs.bk + +# Exclude accidental HTML files +*.html diff --git a/Cargo.toml b/Cargo.toml index 65c715b..1689cc2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "monolith" -version = "2.0.18" +version = "2.0.19" authors = [ "Sunshine ", "Mahdi Robatipoor ", @@ -10,9 +10,8 @@ description = "CLI tool for saving web pages as a single HTML file" [dependencies] base64 = "0.10.1" clap = "2.33.0" -html5ever = "0.24.0" -indicatif = "0.11.0" -lazy_static = "1.3.0" -regex = "1.2.1" +html5ever = "0.24.1" +lazy_static = "1.4.0" +regex = "1.3.1" reqwest = "0.9.20" url = "2.1.0" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..64aa130 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +.PHONY: all build install run test lint + +all: test build + +build: + @cargo build + +install: + @cargo install --force --path . + +test: + @cargo test + @cargo fmt --all -- --check + +lint: + @cargo fmt --all -- diff --git a/src/html.rs b/src/html.rs index 3f9f73e..33592fd 100644 --- a/src/html.rs +++ b/src/html.rs @@ -175,7 +175,7 @@ pub fn walk_and_embed_assets( if &attr.name.local == "src" { let value = attr.value.to_string(); - // Ignore images with empty source (they're hopelessly broken) + // Ignore images with empty source if value == EMPTY_STRING.clone() { continue; } @@ -203,7 +203,14 @@ pub fn walk_and_embed_assets( } "source" => { for attr in attrs_mut.iter_mut() { - if &attr.name.local == "srcset" { + let attr_name: &str = &attr.name.local; + + if attr_name == "src" { + let src_full_url: String = resolve_url(&url, &attr.value.to_string()) + .unwrap_or(attr.value.to_string()); + attr.value.clear(); + attr.value.push_slice(src_full_url.as_str()); + } else if attr_name == "srcset" { if get_node_name(&get_parent_node(&node)) == "picture" { if opt_no_images { attr.value.clear(); @@ -345,6 +352,36 @@ pub fn walk_and_embed_assets( } } } + "video" => { + for attr in attrs_mut.iter_mut() { + if &attr.name.local == "poster" { + let video_poster = attr.value.to_string(); + + // Ignore posters with empty source + if video_poster == EMPTY_STRING.clone() { + continue; + } + + if opt_no_images { + attr.value.clear(); + } else { + let poster_full_url: String = resolve_url(&url, &video_poster) + .unwrap_or(EMPTY_STRING.clone()); + let img_datauri = retrieve_asset( + &poster_full_url, + true, + "", + opt_user_agent, + opt_silent, + opt_insecure, + ) + .unwrap_or(poster_full_url); + attr.value.clear(); + attr.value.push_slice(img_datauri.as_str()); + } + } + } + } _ => {} } diff --git a/src/main.rs b/src/main.rs index b4d83e2..2c88e22 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,6 +21,7 @@ fn main() { .index(1) .help("URL to download"), ) + // .args_from_usage("-a, --include-audio 'Embed audio sources'") .args_from_usage("-c, --no-css 'Ignore styles'") .args_from_usage("-f, --no-frames 'Exclude iframes'") .args_from_usage("-i, --no-images 'Remove images'") @@ -29,6 +30,7 @@ fn main() { .args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'") .args_from_usage("-s, --silent 'Suppress verbosity'") .args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'") + // .args_from_usage("-v, --include-video 'Embed video sources'") .get_matches(); // Process the command