Merge pull request #253 from snshn/unwrap-noscript
Make possible to unwrap NOSCRIPT nodes
This commit is contained in:
commit
8256d17efd
10 changed files with 190 additions and 6 deletions
|
@ -79,6 +79,7 @@ or
|
|||
- `-j`: Exclude JavaScript
|
||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||
- `-M`: Don't add timestamp and URL information
|
||||
- `-n`: Extract contents of NOSCRIPT tags
|
||||
- `-o`: Write output to `file`
|
||||
- `-s`: Be quiet
|
||||
- `-t`: Adjust `network request timeout`
|
||||
|
|
13
src/html.rs
13
src/html.rs
|
@ -474,6 +474,11 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
|||
result = String::from_utf8(buf).unwrap();
|
||||
}
|
||||
|
||||
if options.unwrap_noscript {
|
||||
let noscript_re = Regex::new(r"<(?P<c>/?noscript)>").unwrap();
|
||||
result = noscript_re.replace_all(&result, "<!--$c-->").to_string();
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
|
@ -1060,11 +1065,11 @@ pub fn walk_and_embed_assets(
|
|||
for child_node in node.children.borrow_mut().iter_mut() {
|
||||
match child_node.data {
|
||||
NodeData::Text { ref contents } => {
|
||||
// Get contents of the NOSCRIPT node
|
||||
// Get contents of NOSCRIPT node
|
||||
let mut noscript_contents = contents.borrow_mut();
|
||||
// Parse contents of the NOSCRIPT node
|
||||
// Parse contents of NOSCRIPT node as DOM
|
||||
let noscript_contents_dom: RcDom = html_to_dom(&noscript_contents);
|
||||
// Embed assets within the NOSCRIPT node
|
||||
// Embed assets of NOSCRIPT node contents
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
|
@ -1075,7 +1080,7 @@ pub fn walk_and_embed_assets(
|
|||
);
|
||||
// Get rid of original contents
|
||||
noscript_contents.clear();
|
||||
// Insert HTML containing embedded assets into the NOSCRIPT node
|
||||
// Insert HTML containing embedded assets back into NOSCRIPT node
|
||||
if let Some(html) =
|
||||
get_child_node_by_name(&noscript_contents_dom.document, "html")
|
||||
{
|
||||
|
|
|
@ -21,6 +21,7 @@ pub struct Options {
|
|||
pub no_video: bool,
|
||||
pub target: String,
|
||||
pub no_color: bool,
|
||||
pub unwrap_noscript: bool,
|
||||
}
|
||||
|
||||
const ASCII: &'static str = " \
|
||||
|
@ -55,6 +56,9 @@ impl Options {
|
|||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
|
||||
.args_from_usage(
|
||||
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
|
||||
)
|
||||
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||
|
@ -100,6 +104,7 @@ impl Options {
|
|||
} else {
|
||||
options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
|
||||
}
|
||||
options.unwrap_noscript = app.is_present("unwrap-noscript");
|
||||
options.no_video = app.is_present("no-video");
|
||||
|
||||
options.no_color =
|
||||
|
|
|
@ -202,12 +202,12 @@ mod passing {
|
|||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_css}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
file_url_css = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
|
||||
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
)
|
||||
|
|
|
@ -2,4 +2,5 @@ mod base_url;
|
|||
mod basic;
|
||||
mod data_url;
|
||||
mod local_files;
|
||||
mod noscript;
|
||||
mod unusual_encodings;
|
||||
|
|
164
src/tests/cli/noscript.rs
Normal file
164
src/tests/cli/noscript.rs
Normal file
|
@ -0,0 +1,164 @@
|
|||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use assert_cmd::prelude::*;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use url::Url;
|
||||
|
||||
#[test]
|
||||
fn parse_noscript_contents() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
|
||||
|
||||
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><noscript><img src=\"\"></noscript>\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unwrap_noscript_contents() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
|
||||
|
||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><!--noscript--><img src=\"\"><!--/noscript-->\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unwrap_noscript_contents_nested() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let path_html: &Path = Path::new("src/tests/data/noscript/nested.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
|
||||
|
||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unwrap_noscript_contents_with_script() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let path_html: &Path = Path::new("src/tests/data/noscript/script.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
|
||||
|
||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><!--noscript--><img src=\"\"><!--/noscript-->\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
|
||||
.unwrap()
|
||||
.into_string(),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
5
src/tests/data/noscript/image.svg
Normal file
5
src/tests/data/noscript/image.svg
Normal file
|
@ -0,0 +1,5 @@
|
|||
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect width="100%" height="100%" fill="red" />
|
||||
<circle cx="150" cy="100" r="80" fill="green" />
|
||||
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
|
||||
</svg>
|
After Width: | Height: | Size: 296 B |
1
src/tests/data/noscript/index.html
Normal file
1
src/tests/data/noscript/index.html
Normal file
|
@ -0,0 +1 @@
|
|||
<body><noscript><img src="image.svg" /></noscript></body>
|
1
src/tests/data/noscript/nested.html
Normal file
1
src/tests/data/noscript/nested.html
Normal file
|
@ -0,0 +1 @@
|
|||
<body><noscript><h1>JS is not active</h1><noscript><img src="image.svg" /></noscript></noscript></body>
|
1
src/tests/data/noscript/script.html
Normal file
1
src/tests/data/noscript/script.html
Normal file
|
@ -0,0 +1 @@
|
|||
<body><noscript><script>alert(1);</script><img src="image.svg" /></noscript></body>
|
Loading…
Reference in a new issue