treat frames the same way as iframes
This commit is contained in:
parent
00942e0b1d
commit
7654eec7e2
4 changed files with 81 additions and 10 deletions
|
@ -35,7 +35,7 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
|
|||
|
||||
## Options
|
||||
- `-c`: Ignore styles
|
||||
- `-f`: Exclude iframes
|
||||
- `-f`: Exclude frames and iframes
|
||||
- `-i`: Remove images
|
||||
- `-I`: Isolate the document
|
||||
- `-j`: Exclude JavaScript
|
||||
|
|
|
@ -34,7 +34,7 @@ impl AppArgs {
|
|||
)
|
||||
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-f, --no-frames 'Removes iframes'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
|
|
16
src/html.rs
16
src/html.rs
|
@ -386,7 +386,7 @@ pub fn walk_and_embed_assets(
|
|||
}
|
||||
}
|
||||
}
|
||||
"iframe" => {
|
||||
"frame" | "iframe" => {
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
if &attr.name.local == "src" {
|
||||
if opt_no_frames {
|
||||
|
@ -395,15 +395,15 @@ pub fn walk_and_embed_assets(
|
|||
continue;
|
||||
}
|
||||
|
||||
let iframe_src = attr.value.trim();
|
||||
let frame_src = attr.value.trim();
|
||||
|
||||
// Ignore iframes with empty source (they cause infinite loops)
|
||||
if iframe_src.is_empty() {
|
||||
// Ignore (i)frames with empty source — they cause infinite loops
|
||||
if frame_src.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default();
|
||||
let (iframe_data, iframe_final_url) = retrieve_asset(
|
||||
let src_full_url = resolve_url(&url, frame_src).unwrap_or_default();
|
||||
let (frame_data, frame_final_url) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&src_full_url,
|
||||
|
@ -412,11 +412,11 @@ pub fn walk_and_embed_assets(
|
|||
opt_silent,
|
||||
)
|
||||
.unwrap_or((str!(), src_full_url));
|
||||
let dom = html_to_dom(&iframe_data);
|
||||
let dom = html_to_dom(&frame_data);
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
&iframe_final_url,
|
||||
&frame_final_url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
|
|
|
@ -132,6 +132,42 @@ fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_ensure_no_recursive_frame() {
|
||||
let html = "<frameset><frame src=\"\"></frameset>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
opt_no_frames,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_no_css() {
|
||||
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
||||
|
@ -227,6 +263,41 @@ fn test_walk_and_embed_assets_no_images() {
|
|||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_no_frames() {
|
||||
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let client = Client::new();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
opt_no_frames,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_no_iframes() {
|
||||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
|
|
Loading…
Reference in a new issue