treat frames the same way as iframes

This commit is contained in:
Sunshine 2020-02-24 00:06:31 -05:00
parent 00942e0b1d
commit 7654eec7e2
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
4 changed files with 81 additions and 10 deletions

View file

@ -35,7 +35,7 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
## Options
- `-c`: Ignore styles
- `-f`: Exclude iframes
- `-f`: Exclude frames and iframes
- `-i`: Remove images
- `-I`: Isolate the document
- `-j`: Exclude JavaScript

View file

@ -34,7 +34,7 @@ impl AppArgs {
)
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes iframes'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")

View file

@ -386,7 +386,7 @@ pub fn walk_and_embed_assets(
}
}
}
"iframe" => {
"frame" | "iframe" => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "src" {
if opt_no_frames {
@ -395,15 +395,15 @@ pub fn walk_and_embed_assets(
continue;
}
let iframe_src = attr.value.trim();
let frame_src = attr.value.trim();
// Ignore iframes with empty source (they cause infinite loops)
if iframe_src.is_empty() {
// Ignore (i)frames with empty source — they cause infinite loops
if frame_src.is_empty() {
continue;
}
let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default();
let (iframe_data, iframe_final_url) = retrieve_asset(
let src_full_url = resolve_url(&url, frame_src).unwrap_or_default();
let (frame_data, frame_final_url) = retrieve_asset(
cache,
client,
&src_full_url,
@ -412,11 +412,11 @@ pub fn walk_and_embed_assets(
opt_silent,
)
.unwrap_or((str!(), src_full_url));
let dom = html_to_dom(&iframe_data);
let dom = html_to_dom(&frame_data);
walk_and_embed_assets(
cache,
client,
&iframe_final_url,
&frame_final_url,
&dom.document,
opt_no_css,
opt_no_js,

View file

@ -132,6 +132,42 @@ fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
);
}
#[test]
fn test_walk_and_embed_assets_ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
@ -227,6 +263,41 @@ fn test_walk_and_embed_assets_no_images() {
#[test]
fn test_walk_and_embed_assets_no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html_to_dom(&html);
let url = "http://localhost";