fix srcset parsing

This commit is contained in:
Sunshine 2020-12-25 21:54:52 -10:00
parent 0533b287b7
commit 614a518475
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
4 changed files with 81 additions and 5 deletions

37
Cargo.lock generated
View file

@ -5,6 +5,14 @@ name = "adler"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ansi_term"
version = "0.11.0"
@ -634,6 +642,7 @@ dependencies = [
"clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)",
"cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)",
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.9 (registry+https://github.com/rust-lang/crates.io-index)",
"sha2 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1064,6 +1073,22 @@ name = "redox_syscall"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "regex"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.21 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "remove_dir_all"
version = "0.5.3"
@ -1304,6 +1329,14 @@ dependencies = [
"unicode-width 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "time"
version = "0.1.43"
@ -1623,6 +1656,7 @@ dependencies = [
[metadata]
"checksum adler 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
"checksum aho-corasick 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum assert_cmd 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c88b9ca26f9c16ec830350d309397e74ee9abdfd8eb1f71cb6ecc71a3fc818da"
"checksum async-compression 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "9021768bcce77296b64648cc7a7460e3df99979b97ed5c925c38d1cc83778d98"
@ -1749,6 +1783,8 @@ dependencies = [
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum redox_syscall 0.1.57 (registry+https://github.com/rust-lang/crates.io-index)" = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
"checksum regex 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
"checksum regex-syntax 0.6.21 (registry+https://github.com/rust-lang/crates.io-index)" = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
"checksum remove_dir_all 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
"checksum reqwest 0.10.9 (registry+https://github.com/rust-lang/crates.io-index)" = "fb15d6255c792356a0f578d8a645c677904dc02e862bebe2ecc18e0c01b9a0ce"
"checksum ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
@ -1774,6 +1810,7 @@ dependencies = [
"checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9"
"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
"checksum time 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
"checksum tinyvec 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "53953d2d3a5ad81d9f844a32f14ebb121f50b650cd59d0ee2a07cf13c617efed"
"checksum tokio 0.2.22 (registry+https://github.com/rust-lang/crates.io-index)" = "5d34ca54d84bf2b5b4d7d31e901a8464f7b60ac145a284fba25ceb801f2ddccd"

View file

@ -17,6 +17,7 @@ chrono = "0.4.19" # Used for formatting creation timestamp
clap = "2.33.3"
cssparser = "0.27.2"
html5ever = "0.24.1"
regex = "1.4.2" # Used for parsing srcset
sha2 = "0.9.2" # Used for calculating checksums during integrity checks
url = "2.2.0"

View file

@ -7,6 +7,7 @@ use html5ever::serialize::{serialize, SerializeOpts};
use html5ever::tendril::{format_tendril, TendrilSink};
use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever::{local_name, namespace_url, ns, LocalName};
use regex::Regex;
use reqwest::blocking::Client;
use reqwest::Url;
use sha2::{Digest, Sha256, Sha384, Sha512};
@ -156,8 +157,8 @@ pub fn embed_srcset(
depth: u32,
) -> String {
let mut array: Vec<SrcSetItem> = vec![];
let srcset_items: Vec<&str> = srcset.split(',').collect();
for srcset_item in srcset_items {
let re = Regex::new(r",\s+").unwrap();
for srcset_item in re.split(srcset) {
let parts: Vec<&str> = srcset_item.trim().split_whitespace().collect();
if parts.len() > 0 {
let path = parts[0].trim();

View file

@ -24,8 +24,45 @@ mod passing {
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
embedded_css
);
}
#[test]
fn commas_within_file_names() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x, large,l.png 2x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
);
}
#[test]
fn tabs_and_newlines_after_commas() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x,\nmedium,m.png 2x,\nlarge,l.png 3x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {} 3x",
empty_image!(),
empty_image!(),
empty_image!()
),
);
}
}
@ -56,8 +93,8 @@ mod failing {
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
embedded_css
embedded_css,
format!("{} 1x, {} 2x,", empty_image!(), empty_image!()),
);
}
}