treat frames the same way as iframes
This commit is contained in:
parent
00942e0b1d
commit
7654eec7e2
4 changed files with 81 additions and 10 deletions
|
@ -35,7 +35,7 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
|
||||||
|
|
||||||
## Options
|
## Options
|
||||||
- `-c`: Ignore styles
|
- `-c`: Ignore styles
|
||||||
- `-f`: Exclude iframes
|
- `-f`: Exclude frames and iframes
|
||||||
- `-i`: Remove images
|
- `-i`: Remove images
|
||||||
- `-I`: Isolate the document
|
- `-I`: Isolate the document
|
||||||
- `-j`: Exclude JavaScript
|
- `-j`: Exclude JavaScript
|
||||||
|
|
|
@ -34,7 +34,7 @@ impl AppArgs {
|
||||||
)
|
)
|
||||||
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
|
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
|
||||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||||
.args_from_usage("-f, --no-frames 'Removes iframes'")
|
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||||
.args_from_usage("-i, --no-images 'Removes images'")
|
.args_from_usage("-i, --no-images 'Removes images'")
|
||||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||||
|
|
16
src/html.rs
16
src/html.rs
|
@ -386,7 +386,7 @@ pub fn walk_and_embed_assets(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"iframe" => {
|
"frame" | "iframe" => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "src" {
|
if &attr.name.local == "src" {
|
||||||
if opt_no_frames {
|
if opt_no_frames {
|
||||||
|
@ -395,15 +395,15 @@ pub fn walk_and_embed_assets(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let iframe_src = attr.value.trim();
|
let frame_src = attr.value.trim();
|
||||||
|
|
||||||
// Ignore iframes with empty source (they cause infinite loops)
|
// Ignore (i)frames with empty source — they cause infinite loops
|
||||||
if iframe_src.is_empty() {
|
if frame_src.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default();
|
let src_full_url = resolve_url(&url, frame_src).unwrap_or_default();
|
||||||
let (iframe_data, iframe_final_url) = retrieve_asset(
|
let (frame_data, frame_final_url) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&src_full_url,
|
&src_full_url,
|
||||||
|
@ -412,11 +412,11 @@ pub fn walk_and_embed_assets(
|
||||||
opt_silent,
|
opt_silent,
|
||||||
)
|
)
|
||||||
.unwrap_or((str!(), src_full_url));
|
.unwrap_or((str!(), src_full_url));
|
||||||
let dom = html_to_dom(&iframe_data);
|
let dom = html_to_dom(&frame_data);
|
||||||
walk_and_embed_assets(
|
walk_and_embed_assets(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&iframe_final_url,
|
&frame_final_url,
|
||||||
&dom.document,
|
&dom.document,
|
||||||
opt_no_css,
|
opt_no_css,
|
||||||
opt_no_js,
|
opt_no_js,
|
||||||
|
|
|
@ -132,6 +132,42 @@ fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_walk_and_embed_assets_ensure_no_recursive_frame() {
|
||||||
|
let html = "<frameset><frame src=\"\"></frameset>";
|
||||||
|
let dom = html_to_dom(&html);
|
||||||
|
let url = "http://localhost";
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let opt_no_css: bool = false;
|
||||||
|
let opt_no_frames: bool = false;
|
||||||
|
let opt_no_js: bool = false;
|
||||||
|
let opt_no_images: bool = false;
|
||||||
|
let opt_silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
walk_and_embed_assets(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&url,
|
||||||
|
&dom.document,
|
||||||
|
opt_no_css,
|
||||||
|
opt_no_js,
|
||||||
|
opt_no_images,
|
||||||
|
opt_silent,
|
||||||
|
opt_no_frames,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_walk_and_embed_assets_no_css() {
|
fn test_walk_and_embed_assets_no_css() {
|
||||||
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
||||||
|
@ -227,6 +263,41 @@ fn test_walk_and_embed_assets_no_images() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_walk_and_embed_assets_no_frames() {
|
fn test_walk_and_embed_assets_no_frames() {
|
||||||
|
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
||||||
|
let dom = html_to_dom(&html);
|
||||||
|
let url = "http://localhost";
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let opt_no_css: bool = false;
|
||||||
|
let opt_no_frames: bool = true;
|
||||||
|
let opt_no_js: bool = false;
|
||||||
|
let opt_no_images: bool = false;
|
||||||
|
let opt_silent = true;
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
walk_and_embed_assets(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&url,
|
||||||
|
&dom.document,
|
||||||
|
opt_no_css,
|
||||||
|
opt_no_js,
|
||||||
|
opt_no_images,
|
||||||
|
opt_silent,
|
||||||
|
opt_no_frames,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_walk_and_embed_assets_no_iframes() {
|
||||||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||||
let dom = html_to_dom(&html);
|
let dom = html_to_dom(&html);
|
||||||
let url = "http://localhost";
|
let url = "http://localhost";
|
||||||
|
|
Loading…
Reference in a new issue