Merge pull request #173 from snshn/sha2-integrity

Add asset integrity validation
This commit is contained in:
Sunshine 2020-05-12 03:15:02 -04:00 committed by GitHub
commit ac4945ca97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 924 additions and 582 deletions

84
Cargo.lock generated
View File

@ -71,11 +71,40 @@ name = "bitflags"
version = "1.2.1" version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "block-buffer"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "block-padding"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.1.1" version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byte-tools"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "0.5.3" version = "0.5.3"
@ -173,6 +202,14 @@ name = "difference"
version = "2.0.0" version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "digest"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "doc-comment" name = "doc-comment"
version = "0.3.1" version = "0.3.1"
@ -210,6 +247,11 @@ dependencies = [
"serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "fake-simd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.0.13" version = "1.0.13"
@ -308,6 +350,14 @@ dependencies = [
"slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "generic-array"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"typenum 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.13" version = "0.1.13"
@ -573,7 +623,7 @@ dependencies = [
[[package]] [[package]]
name = "monolith" name = "monolith"
version = "2.2.4" version = "2.2.5"
dependencies = [ dependencies = [
"assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", "assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", "base64 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -581,6 +631,7 @@ dependencies = [
"cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)", "cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)",
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
"sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -636,6 +687,11 @@ dependencies = [
"libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "opaque-debug"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "openssl" name = "openssl"
version = "0.10.26" version = "0.10.26"
@ -1102,6 +1158,17 @@ dependencies = [
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "sha2"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "siphasher" name = "siphasher"
version = "0.2.3" version = "0.2.3"
@ -1267,6 +1334,11 @@ name = "try-lock"
version = "0.2.2" version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "typenum"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "unicase" name = "unicase"
version = "2.6.0" version = "2.6.0"
@ -1509,7 +1581,11 @@ dependencies = [
"checksum base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" "checksum base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
"checksum base64 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d5ca2cd0adc3f48f9e9ea5a6bbdf9ccc0bfade884847e484d452414c7ccffb3" "checksum base64 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d5ca2cd0adc3f48f9e9ea5a6bbdf9ccc0bfade884847e484d452414c7ccffb3"
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
"checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
"checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
"checksum bumpalo 3.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fe2567a8d8a3aedb4e39aa39e186d5673acfd56393c6ac83b2bc5bd82f4369c" "checksum bumpalo 3.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fe2567a8d8a3aedb4e39aa39e186d5673acfd56393c6ac83b2bc5bd82f4369c"
"checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
"checksum bytes 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "10004c15deb332055f7a4a208190aed362cf9a7c2f6ab70a305fba50e1105f38" "checksum bytes 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "10004c15deb332055f7a4a208190aed362cf9a7c2f6ab70a305fba50e1105f38"
"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
"checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8" "checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8"
@ -1522,11 +1598,13 @@ dependencies = [
"checksum cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)" = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" "checksum cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)" = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
"checksum cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" "checksum cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" "checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97"
"checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e" "checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e"
"checksum dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2" "checksum dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2"
"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9" "checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9"
"checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d" "checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d"
"checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
"checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f" "checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" "checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
@ -1541,6 +1619,7 @@ dependencies = [
"checksum futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "171be33efae63c2d59e6dbba34186fe0d6394fb378069a76dfd80fdcffd43c16" "checksum futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "171be33efae63c2d59e6dbba34186fe0d6394fb378069a76dfd80fdcffd43c16"
"checksum futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0bae52d6b29cf440e298856fec3965ee6fa71b06aa7495178615953fd669e5f9" "checksum futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0bae52d6b29cf440e298856fec3965ee6fa71b06aa7495178615953fd669e5f9"
"checksum futures-util 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c0d66274fb76985d3c62c886d1da7ac4c0903a8c9f754e8fe0f35a6a6cc39e76" "checksum futures-util 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c0d66274fb76985d3c62c886d1da7ac4c0903a8c9f754e8fe0f35a6a6cc39e76"
"checksum generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c68f0274ae0e023facc3c97b2e00f076be70e254bc851d972503b328db79b2ec"
"checksum getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "e7db7ca94ed4cd01190ceee0d8a8052f08a247aa1b469a7f68c6a3b71afcf407" "checksum getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "e7db7ca94ed4cd01190ceee0d8a8052f08a247aa1b469a7f68c6a3b71afcf407"
"checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1" "checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" "checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
@ -1574,6 +1653,7 @@ dependencies = [
"checksum new_debug_unreachable 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f40f005c60db6e03bae699e414c58bf9aa7ea02a2d0b9bfbcf19286cc4c82b30" "checksum new_debug_unreachable 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f40f005c60db6e03bae699e414c58bf9aa7ea02a2d0b9bfbcf19286cc4c82b30"
"checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" "checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6"
"checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72" "checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72"
"checksum opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
"checksum openssl 0.10.26 (registry+https://github.com/rust-lang/crates.io-index)" = "3a3cc5799d98e1088141b8e01ff760112bbd9f19d850c124500566ca6901a585" "checksum openssl 0.10.26 (registry+https://github.com/rust-lang/crates.io-index)" = "3a3cc5799d98e1088141b8e01ff760112bbd9f19d850c124500566ca6901a585"
"checksum openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" "checksum openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de"
"checksum openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)" = "465d16ae7fc0e313318f7de5cecf57b2fbe7511fd213978b457e1c96ff46736f" "checksum openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)" = "465d16ae7fc0e313318f7de5cecf57b2fbe7511fd213978b457e1c96ff46736f"
@ -1626,6 +1706,7 @@ dependencies = [
"checksum serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "a8c6faef9a2e64b0064f48570289b4bf8823b7581f1d6157c1b52152306651d0" "checksum serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "a8c6faef9a2e64b0064f48570289b4bf8823b7581f1d6157c1b52152306651d0"
"checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043" "checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043"
"checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97" "checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97"
"checksum sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0"
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" "checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
"checksum siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8e88f89a550c01e4cd809f3df4f52dc9e939f3273a2017eabd5c6d12fd98bb23" "checksum siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8e88f89a550c01e4cd809f3df4f52dc9e939f3273a2017eabd5c6d12fd98bb23"
"checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" "checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
@ -1646,6 +1727,7 @@ dependencies = [
"checksum tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860" "checksum tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860"
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" "checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
"checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382" "checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382"
"checksum typenum 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33"
"checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" "checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
"checksum unicode-normalization 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf" "checksum unicode-normalization 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"

View File

@ -1,6 +1,6 @@
[package] [package]
name = "monolith" name = "monolith"
version = "2.2.4" version = "2.2.5"
edition = "2018" edition = "2018"
authors = [ authors = [
"Sunshine <sunshine@uberspace.net>", "Sunshine <sunshine@uberspace.net>",
@ -16,12 +16,10 @@ base64 = "0.12.0"
clap = "2.33.0" clap = "2.33.0"
cssparser = "0.27.2" cssparser = "0.27.2"
html5ever = "0.24.1" html5ever = "0.24.1"
sha2 = "0.8.1" # Used in calculating checksums during integrity checks
time = "0.1.42" # Used to render comments indicating the time the page was saved
url = "2.1.1" url = "2.1.1"
# Used to render comments indicating the time the page was saved
# also required by reqwest as of v0.10.0
time = "0.1.42"
[dependencies.reqwest] [dependencies.reqwest]
version = "0.10.*" version = "0.10.*"
default-features = false default-features = false

View File

@ -2,7 +2,7 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client; use reqwest::blocking::Client;
use std::collections::HashMap; use std::collections::HashMap;
use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset}; use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset};
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
// Universal // Universal
@ -169,19 +169,10 @@ pub fn process_css<'a>(
continue; continue;
} }
let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone()); let import_url_fragment = get_url_fragment(import_full_url.clone());
let (css, final_url) = retrieve_asset( match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) {
cache, Ok((import_contents, import_final_url, _import_media_type)) => {
client,
&parent_url,
&full_url,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str( result.push_str(
enquote( enquote(
data_to_data_url( data_to_data_url(
@ -189,20 +180,28 @@ pub fn process_css<'a>(
embed_css( embed_css(
cache, cache,
client, client,
final_url.as_str(), &import_final_url,
&css, &String::from_utf8_lossy(&import_contents),
opt_no_fonts, opt_no_fonts,
opt_no_images, opt_no_images,
opt_silent, opt_silent,
) )
.as_bytes(), .as_bytes(),
&final_url, &import_final_url,
url_fragment.as_str(), &import_url_fragment,
), ),
false, false,
) )
.as_str(), .as_str(),
); );
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) {
result.push_str(enquote(import_full_url, false).as_str());
}
}
}
} else { } else {
if func_name == "url" { if func_name == "url" {
// Skip empty url()'s // Skip empty url()'s
@ -214,18 +213,31 @@ pub fn process_css<'a>(
result.push_str(enquote(str!(empty_image!()), false).as_str()); result.push_str(enquote(str!(empty_image!()), false).as_str());
} else { } else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default(); let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset( let url_fragment = get_url_fragment(resolved_url.clone());
match retrieve_asset(
cache, cache,
client, client,
&parent_url, &parent_url,
&resolved_url, &resolved_url,
true,
"",
opt_silent, opt_silent,
) ) {
.unwrap_or_default(); Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(
&media_type,
&data,
&final_url,
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str()); result.push_str(enquote(data_url, false).as_str());
} }
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) {
result.push_str(enquote(resolved_url, false).as_str());
}
}
}
}
} else { } else {
result.push_str(enquote(str!(value), false).as_str()); result.push_str(enquote(str!(value), false).as_str());
} }
@ -293,55 +305,52 @@ pub fn process_css<'a>(
if is_import { if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone()); let url_fragment = get_url_fragment(full_url.clone());
let (css, final_url) = retrieve_asset( match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
cache, Ok((css, final_url, _media_type)) => {
client, let data_url = data_to_data_url(
&parent_url,
&full_url,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
"text/css", "text/css",
embed_css( embed_css(
cache, cache,
client, client,
final_url.as_str(), &final_url,
&css, &String::from_utf8_lossy(&css),
opt_no_fonts, opt_no_fonts,
opt_no_images, opt_no_images,
opt_silent, opt_silent,
) )
.as_bytes(), .as_bytes(),
&final_url, &final_url,
url_fragment.as_str(), &url_fragment,
),
false,
)
.as_str(),
); );
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
}
}
}
} else { } else {
if opt_no_images && is_image_url_prop(curr_prop.as_str()) { if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str()); result.push_str(enquote(str!(empty_image!()), false).as_str());
} else { } else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset( let url_fragment = get_url_fragment(full_url.clone());
cache, match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
client, Ok((data, final_url, media_type)) => {
&parent_url, let data_url =
&full_url, data_to_data_url(&media_type, &data, &final_url, &url_fragment);
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str()); result.push_str(enquote(data_url, false).as_str());
} }
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
}
}
}
}
} }
result.push_str(")"); result.push_str(")");
} }

View File

@ -1,6 +1,9 @@
use crate::css::embed_css; use crate::css::embed_css;
use crate::js::attr_is_event_handler; use crate::js::attr_is_event_handler;
use crate::utils::{data_to_data_url, is_http_url, resolve_url, retrieve_asset, url_has_protocol}; use crate::utils::{
data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset, url_has_protocol,
};
use base64;
use html5ever::interface::QualName; use html5ever::interface::QualName;
use html5ever::parse_document; use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom}; use html5ever::rcdom::{Handle, NodeData, RcDom};
@ -9,6 +12,7 @@ use html5ever::tendril::{format_tendril, Tendril, TendrilSink};
use html5ever::tree_builder::{Attribute, TreeSink}; use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever::{local_name, namespace_url, ns}; use html5ever::{local_name, namespace_url, ns};
use reqwest::blocking::Client; use reqwest::blocking::Client;
use sha2::{Digest, Sha256, Sha384, Sha512};
use std::collections::HashMap; use std::collections::HashMap;
use std::default::Default; use std::default::Default;
@ -36,6 +40,24 @@ pub fn is_icon(attr_value: &str) -> bool {
ICON_VALUES.contains(&attr_value.to_lowercase().as_str()) ICON_VALUES.contains(&attr_value.to_lowercase().as_str())
} }
pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool {
if integrity.starts_with("sha256-") {
let mut hasher = Sha256::new();
hasher.input(data);
base64::encode(hasher.result()) == integrity[7..]
} else if integrity.starts_with("sha384-") {
let mut hasher = Sha384::new();
hasher.input(data);
base64::encode(hasher.result()) == integrity[7..]
} else if integrity.starts_with("sha512-") {
let mut hasher = Sha512::new();
hasher.input(data);
base64::encode(hasher.result()) == integrity[7..]
} else {
false
}
}
pub fn walk_and_embed_assets( pub fn walk_and_embed_assets(
cache: &mut HashMap<String, Vec<u8>>, cache: &mut HashMap<String, Vec<u8>>,
client: &Client, client: &Client,
@ -75,12 +97,13 @@ pub fn walk_and_embed_assets(
match name.local.as_ref() { match name.local.as_ref() {
"link" => { "link" => {
// Remove integrity attributes // Remove integrity attributes, keep value of the last one
let mut integrity: String = str!();
let mut i = 0; let mut i = 0;
while i < attrs_mut.len() { while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref(); let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("integrity") { if attr_name.eq_ignore_ascii_case("integrity") {
attrs_mut.remove(i); integrity = str!(attrs_mut.remove(i).value.trim());
} else { } else {
i += 1; i += 1;
} }
@ -117,93 +140,166 @@ pub fn walk_and_embed_assets(
match link_type { match link_type {
LinkType::Icon => { LinkType::Icon => {
for attr in attrs_mut.iter_mut() { // Find and remove href attribute(s), keep value of the last found one
if &attr.name.local == "href" { let mut link_href: String = str!();
if opt_no_images { let mut i = 0;
attr.value.clear(); while i < attrs_mut.len() {
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("href") {
link_href = str!(attrs_mut.remove(i).value.trim());
} else { } else {
let href_full_url = resolve_url(&url, attr.value.as_ref()) i += 1;
.unwrap_or_default(); }
let (favicon_data_url, _) = retrieve_asset( }
if !opt_no_images && !link_href.is_empty() {
let link_href_full_url =
resolve_url(&url, link_href).unwrap_or_default();
let link_href_url_fragment =
get_url_fragment(link_href_full_url.clone());
match retrieve_asset(
cache, cache,
client, client,
&url, &url,
&href_full_url, &link_href_full_url,
true,
"",
opt_silent, opt_silent,
) ) {
.unwrap_or_default(); Ok((
attr.value.clear(); link_href_data,
attr.value.push_slice(favicon_data_url.as_str()); link_href_final_url,
link_href_media_type,
)) => {
// Check integrity
if integrity.is_empty()
|| has_proper_integrity(&link_href_data, &integrity)
{
let link_href_data_url = data_to_data_url(
&link_href_media_type,
&link_href_data,
&link_href_final_url,
&link_href_url_fragment,
);
// Add new data URL href attribute
attrs_mut.push(Attribute {
name: QualName::new(
None,
ns!(),
local_name!("href"),
),
value: Tendril::from_slice(
link_href_data_url.as_ref(),
),
});
}
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(link_href_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(
None,
ns!(),
local_name!("href"),
),
value: Tendril::from_slice(
link_href_full_url.as_ref(),
),
});
}
} }
} }
} }
} }
LinkType::Stylesheet => { LinkType::Stylesheet => {
for attr in attrs_mut.iter_mut() { // Find and remove href attribute(s), keep value of the last found one
if &attr.name.local == "href" { let mut link_href: String = str!();
if opt_no_css { let mut i = 0;
attr.value.clear(); while i < attrs_mut.len() {
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("href") {
link_href = str!(attrs_mut.remove(i).value.trim());
} else { } else {
let href_full_url = resolve_url(&url, &attr.value.as_ref()) i += 1;
.unwrap_or_default(); }
let replacement_text = match retrieve_asset( }
if !opt_no_css && !link_href.is_empty() {
let link_href_full_url =
resolve_url(&url, link_href).unwrap_or_default();
match retrieve_asset(
cache, cache,
client, client,
&url, &url,
&href_full_url, &link_href_full_url,
false,
"text/css",
opt_silent, opt_silent,
) { ) {
// On successful retrieval, traverse CSS Ok((
Ok((css_data, final_url)) => { link_href_data,
link_href_final_url,
_link_href_media_type,
)) => {
// Check integrity
if integrity.is_empty()
|| has_proper_integrity(&link_href_data, &integrity)
{
let css: String = embed_css( let css: String = embed_css(
cache, cache,
client, client,
&final_url, &link_href_final_url,
&css_data, &String::from_utf8_lossy(&link_href_data),
opt_no_fonts, opt_no_fonts,
opt_no_images, opt_no_images,
opt_silent, opt_silent,
); );
data_to_data_url( let link_href_data_url = data_to_data_url(
"text/css", "text/css",
css.as_bytes(), css.as_bytes(),
&final_url, &link_href_final_url,
"", "",
) );
// Add new data URL href attribute
attrs_mut.push(Attribute {
name: QualName::new(
None,
ns!(),
local_name!("href"),
),
value: Tendril::from_slice(
link_href_data_url.as_ref(),
),
});
} }
// If a network error occured, warn
Err(e) => {
eprintln!("Warning: {}", e);
// If failed to resolve, replace with absolute URL
href_full_url
} }
}; Err(_) => {
// Keep remote reference if unable to retrieve the asset
attr.value.clear(); if is_http_url(link_href_full_url.clone()) {
attr.value.push_slice(&replacement_text); attrs_mut.push(Attribute {
name: QualName::new(
None,
ns!(),
local_name!("href"),
),
value: Tendril::from_slice(
link_href_full_url.as_ref(),
),
});
}
} }
} }
} }
} }
LinkType::Preload | LinkType::DnsPrefetch => { LinkType::Preload | LinkType::DnsPrefetch => {
// Since all resources are embedded as data URL, preloading and prefetching are unnecessary // Since all resources are embedded as data URL, preloading and prefetching are unnecessary
if let Some(attr) = for _ in 0..attrs_mut.len() {
attrs_mut.iter_mut().find(|a| &a.name.local == "href") attrs_mut.remove(0);
{
attr.value.clear();
} }
} }
LinkType::Unknown => { LinkType::Unknown => {
for attr in attrs_mut.iter_mut() { for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" { let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("href") {
let href_full_url = let href_full_url =
resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); resolve_url(&url, attr.value.trim()).unwrap_or_default();
attr.value.clear(); attr.value.clear();
attr.value.push_slice(&href_full_url.as_str()); attr.value.push_slice(&href_full_url.as_str());
} }
@ -212,141 +308,113 @@ pub fn walk_and_embed_assets(
} }
} }
"body" => { "body" => {
// Find and remove background attribute(s), keep reference to the last one // Find and remove background attribute(s), keep value of the last found one
let mut found_background: Option<Attribute> = None; let mut background: String = str!();
let mut i = 0; let mut i = 0;
while i < attrs_mut.len() { while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref(); let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("background") { if attr_name.eq_ignore_ascii_case("background") {
found_background = Some(attrs_mut.remove(i)); background = str!(attrs_mut.remove(i).value.trim());
} else { } else {
i += 1; i += 1;
} }
} }
if !opt_no_images { if !opt_no_images && !background.is_empty() {
if let Some((data_url, _)) = found_background let background_full_url = resolve_url(&url, background).unwrap_or_default();
.iter() let background_url_fragment = get_url_fragment(background_full_url.clone());
.map(|attr| attr.value.trim()) match retrieve_asset(cache, client, &url, &background_full_url, opt_silent)
.filter(|background| !background.is_empty()) // Skip if empty
.next()
.and_then(|background| resolve_url(&url, background).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url,
&abs_src,
true,
"",
opt_silent,
).ok())
{ {
// Add new data_url background attribute Ok((background_data, background_final_url, background_media_type)) => {
let background_data_url = data_to_data_url(
&background_media_type,
&background_data,
&background_final_url,
&background_url_fragment,
);
// Add new data URL background attribute
attrs_mut.push(Attribute { attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("background")), name: QualName::new(None, ns!(), local_name!("background")),
value: Tendril::from_slice(data_url.as_ref()), value: Tendril::from_slice(background_data_url.as_ref()),
}); });
} }
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(background_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("background")),
value: Tendril::from_slice(background_full_url.as_ref()),
});
}
}
}
} }
} }
"img" => { "img" => {
// Find source attribute(s) // Find source attribute(s)
let mut found_src: Option<Attribute> = None; let mut img_src: String = str!();
let mut found_datasrc: Option<Attribute> = None; let mut img_data_src: String = str!();
let mut i = 0; let mut i = 0;
while i < attrs_mut.len() { while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref(); let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("src") { if attr_name.eq_ignore_ascii_case("src") {
found_src = Some(attrs_mut.remove(i)); img_src = str!(attrs_mut.remove(i).value.trim());
} else if attr_name.eq_ignore_ascii_case("data-src") { } else if attr_name.eq_ignore_ascii_case("data-src") {
found_datasrc = Some(attrs_mut.remove(i)); img_data_src = str!(attrs_mut.remove(i).value.trim());
} else { } else {
i += 1; i += 1;
} }
} }
// If images are disabled, clear both sources
if opt_no_images { if opt_no_images {
// Add empty image src attribute
attrs_mut.push(Attribute { attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")), name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(empty_image!()), value: Tendril::from_slice(empty_image!()),
}); });
} else if let Some((data_url, _)) = found_datasrc
.iter()
.chain(&found_src) // Give data_url priority
.map(|attr| attr.value.trim())
.filter(|src| !src.is_empty()) // Skip if empty
.next()
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url,
&abs_src,
true,
"",
opt_silent,
).ok())
{
// Add new data_url src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(data_url.as_ref()),
});
}
}
"input" => {
let mut is_image: bool = false;
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name == "type" {
is_image = attr.value.to_string().eq_ignore_ascii_case("image");
}
}
if is_image {
let mut found_src: Option<Attribute> = None;
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
if attr_name.eq_ignore_ascii_case("src") {
found_src = Some(attrs_mut.remove(i));
} else { } else {
i += 1; if img_src.is_empty() && img_data_src.is_empty() {
} // Add empty src attribute
}
// If images are disabled, clear both sources
if opt_no_images {
attrs_mut.push(Attribute { attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")), name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(empty_image!()), value: Tendril::from_slice(""),
}); });
} else if let Some((data_url, _)) = found_src } else {
.iter() // Add data URL src attribute
.map(|attr| attr.value.trim()) let img_full_url = resolve_url(
.filter(|src| !src.is_empty()) // Skip if empty
.next()
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url, &url,
&abs_src, if !img_data_src.is_empty() {
true, img_data_src
"", } else {
opt_silent, img_src
).ok()) },
{ )
// Add new data_url src attribute .unwrap_or_default();
let img_url_fragment = get_url_fragment(img_full_url.clone());
match retrieve_asset(cache, client, &url, &img_full_url, opt_silent) {
Ok((img_data, img_final_url, img_media_type)) => {
let img_data_url = data_to_data_url(
&img_media_type,
&img_data,
&img_final_url,
&img_url_fragment,
);
attrs_mut.push(Attribute { attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")), name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(data_url.as_ref()), value: Tendril::from_slice(img_data_url.as_ref()),
}); });
} }
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(img_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(img_full_url.as_ref()),
});
}
}
}
}
} }
} }
"svg" => { "svg" => {
@ -354,57 +422,135 @@ pub fn walk_and_embed_assets(
node.children.borrow_mut().clear(); node.children.borrow_mut().clear();
} }
} }
"image" => { "input" => {
// Find and remove (xlink:)href attribute(s), keep reference to the last one // Determine input type
let mut image_href: Option<Attribute> = None; let mut is_image_input: bool = false;
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("type") {
is_image_input = attr.value.to_string().eq_ignore_ascii_case("image");
}
}
if is_image_input {
let mut input_image_src: String = str!();
let mut i = 0; let mut i = 0;
while i < attrs_mut.len() { while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref(); let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("xlink:href") if attr_name.eq_ignore_ascii_case("src") {
|| attr_name.eq_ignore_ascii_case("href") input_image_src = str!(attrs_mut.remove(i).value.trim());
{
image_href = Some(attrs_mut.remove(i));
} else { } else {
i += 1; i += 1;
} }
} }
if !opt_no_images { if opt_no_images || input_image_src.is_empty() {
if let Some((data_url, _)) = image_href attrs_mut.push(Attribute {
.iter() name: QualName::new(None, ns!(), local_name!("src")),
.map(|attr| attr.value.trim()) value: Tendril::from_slice(if input_image_src.is_empty() {
.filter(|href| !href.is_empty()) // Skip if empty ""
.next() } else {
.and_then(|href| resolve_url(&url, href).ok()) // Make absolute empty_image!()
.and_then(|abs_href| // Download and convert to data_url }),
retrieve_asset( });
} else {
let input_image_full_url =
resolve_url(&url, input_image_src).unwrap_or_default();
let input_image_url_fragment =
get_url_fragment(input_image_full_url.clone());
match retrieve_asset(
cache, cache,
client, client,
&url, &url,
&abs_href, &input_image_full_url,
true,
"",
opt_silent, opt_silent,
).ok()) ) {
Ok((
input_image_data,
input_image_final_url,
input_image_media_type,
)) => {
let input_image_data_url = data_to_data_url(
&input_image_media_type,
&input_image_data,
&input_image_final_url,
&input_image_url_fragment,
);
// Add data URL src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(input_image_data_url.as_ref()),
});
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(input_image_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(
input_image_full_url.as_ref(),
),
});
}
}
}
}
}
}
"image" => {
// Find and remove (xlink:)href attribute(s), keep value of the last one
let mut image_href: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("xlink:href")
|| attr_name.eq_ignore_ascii_case("href")
{ {
// Add new data_url href attribute image_href = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
}
if !opt_no_images && !image_href.is_empty() {
let image_full_url = resolve_url(&url, image_href).unwrap_or_default();
let image_url_fragment = get_url_fragment(image_full_url.clone());
match retrieve_asset(cache, client, &url, &image_full_url, opt_silent) {
Ok((image_data, image_final_url, image_media_type)) => {
let image_data_url = data_to_data_url(
&image_media_type,
&image_data,
&image_final_url,
&image_url_fragment,
);
// Add new data URL href attribute
attrs_mut.push(Attribute { attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("href")), name: QualName::new(None, ns!(), local_name!("href")),
value: Tendril::from_slice(data_url.as_ref()), value: Tendril::from_slice(image_data_url.as_ref()),
}); });
} }
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(image_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("href")),
value: Tendril::from_slice(image_full_url.as_ref()),
});
}
}
}
} }
} }
"source" => { "source" => {
for attr in attrs_mut.iter_mut() { for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local; let attr_name: &str = &attr.name.local;
if attr_name == "src" { if attr_name.eq_ignore_ascii_case("src") {
let src_full_url = resolve_url(&url, attr.value.trim()) let src_full_url = resolve_url(&url, attr.value.trim())
.unwrap_or_else(|_| attr.value.to_string()); .unwrap_or_else(|_| attr.value.to_string());
attr.value.clear(); attr.value.clear();
attr.value.push_slice(src_full_url.as_str()); attr.value.push_slice(src_full_url.as_str());
} else if attr_name == "srcset" { } else if attr_name.eq_ignore_ascii_case("srcset") {
if get_node_name(&get_parent_node(&node)) == Some("picture") { if get_node_name(&get_parent_node(&node)) == Some("picture") {
if opt_no_images { if opt_no_images {
attr.value.clear(); attr.value.clear();
@ -412,18 +558,38 @@ pub fn walk_and_embed_assets(
} else { } else {
let srcset_full_url = let srcset_full_url =
resolve_url(&url, attr.value.trim()).unwrap_or_default(); resolve_url(&url, attr.value.trim()).unwrap_or_default();
let (source_data_url, _) = retrieve_asset( let srcset_url_fragment =
get_url_fragment(srcset_full_url.clone());
match retrieve_asset(
cache, cache,
client, client,
&url, &url,
&srcset_full_url, &srcset_full_url,
true,
"",
opt_silent, opt_silent,
) ) {
.unwrap_or((str!(), str!())); Ok((srcset_data, srcset_final_url, srcset_media_type)) => {
let srcset_data_url = data_to_data_url(
&srcset_media_type,
&srcset_data,
&srcset_final_url,
&srcset_url_fragment,
);
attr.value.clear(); attr.value.clear();
attr.value.push_slice(source_data_url.as_str()); attr.value.push_slice(srcset_data_url.as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(srcset_full_url.clone()) {
attr.value.clear();
attr.value.push_slice(srcset_full_url.as_str());
if !srcset_url_fragment.is_empty() {
attr.value.push_slice("#");
attr.value
.push_slice(srcset_url_fragment.as_str());
}
}
}
}
} }
} }
} }
@ -431,7 +597,8 @@ pub fn walk_and_embed_assets(
} }
"a" | "area" => { "a" | "area" => {
for attr in attrs_mut.iter_mut() { for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" { let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("href") {
let attr_value = attr.value.trim(); let attr_value = attr.value.trim();
if opt_no_js && attr_value.starts_with("javascript:") { if opt_no_js && attr_value.starts_with("javascript:") {
@ -453,44 +620,55 @@ pub fn walk_and_embed_assets(
} }
} }
"script" => { "script" => {
// Remove integrity attributes // Remove integrity and src attributes, keep values of the last ones
let mut script_integrity: String = str!();
let mut script_src: String = str!();
let mut i = 0; let mut i = 0;
while i < attrs_mut.len() { while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref(); let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("integrity") { if attr_name.eq_ignore_ascii_case("integrity") {
attrs_mut.remove(i); script_integrity = str!(attrs_mut.remove(i).value.trim());
} else if attr_name.eq_ignore_ascii_case("src") {
script_src = str!(attrs_mut.remove(i).value.trim());
} else { } else {
i += 1; i += 1;
} }
} }
if opt_no_js { if opt_no_js {
// Empty src and inner content of SCRIPT tags // Empty inner content (src is already gone)
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "src" {
attr.value.clear();
}
}
node.children.borrow_mut().clear(); node.children.borrow_mut().clear();
} else { } else if !script_src.is_empty() {
for attr in attrs_mut.iter_mut() { let script_full_url = resolve_url(&url, script_src).unwrap_or_default();
if &attr.name.local == "src" { match retrieve_asset(cache, client, &url, &script_full_url, opt_silent) {
let src_full_url = Ok((script_data, script_final_url, _script_media_type)) => {
resolve_url(&url, attr.value.trim()).unwrap_or_default(); // Only embed if we're able to validate integrity
let (js_data_url, _) = retrieve_asset( if script_integrity.is_empty()
cache, || has_proper_integrity(&script_data, &script_integrity)
client, {
&url, let script_data_url = data_to_data_url(
&src_full_url,
true,
"application/javascript", "application/javascript",
opt_silent, &script_data,
) &script_final_url,
.unwrap_or((str!(), str!())); "",
attr.value.clear(); );
attr.value.push_slice(js_data_url.as_str()); // Add new data URL src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(script_data_url.as_ref()),
});
} }
} }
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(script_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(script_full_url.as_ref()),
});
}
}
};
} }
} }
"style" => { "style" => {
@ -518,21 +696,23 @@ pub fn walk_and_embed_assets(
} }
"form" => { "form" => {
for attr in attrs_mut.iter_mut() { for attr in attrs_mut.iter_mut() {
if &attr.name.local == "action" { let attr_name: &str = &attr.name.local;
let attr_value = attr.value.trim(); if attr_name.eq_ignore_ascii_case("action") {
// Modify action to be a full URL let form_action = attr.value.trim();
if !is_http_url(attr_value) { // Modify action property to ensure it's a full URL
let href_full_url = if !is_http_url(form_action) {
resolve_url(&url, attr_value).unwrap_or_default(); let form_action_full_url =
resolve_url(&url, form_action).unwrap_or_default();
attr.value.clear(); attr.value.clear();
attr.value.push_slice(href_full_url.as_str()); attr.value.push_slice(form_action_full_url.as_str());
} }
} }
} }
} }
"frame" | "iframe" => { "frame" | "iframe" => {
for attr in attrs_mut.iter_mut() { for attr in attrs_mut.iter_mut() {
if &attr.name.local == "src" { let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("src") {
if opt_no_frames { if opt_no_frames {
// Empty the src attribute // Empty the src attribute
attr.value.clear(); attr.value.clear();
@ -546,23 +726,17 @@ pub fn walk_and_embed_assets(
continue; continue;
} }
let src_full_url = resolve_url(&url, frame_src).unwrap_or_default(); let frame_full_url = resolve_url(&url, frame_src).unwrap_or_default();
let (frame_data, frame_final_url) = retrieve_asset( let frame_url_fragment = get_url_fragment(frame_full_url.clone());
cache, match retrieve_asset(cache, client, &url, &frame_full_url, opt_silent) {
client, Ok((frame_data, frame_final_url, frame_media_type)) => {
&url, let frame_dom =
&src_full_url, html_to_dom(&String::from_utf8_lossy(&frame_data));
false,
"text/html",
opt_silent,
)
.unwrap_or((str!(), src_full_url));
let dom = html_to_dom(&frame_data);
walk_and_embed_assets( walk_and_embed_assets(
cache, cache,
client, client,
&frame_final_url, &frame_final_url,
&dom.document, &frame_dom.document,
opt_no_css, opt_no_css,
opt_no_fonts, opt_no_fonts,
opt_no_frames, opt_no_frames,
@ -570,41 +744,81 @@ pub fn walk_and_embed_assets(
opt_no_images, opt_no_images,
opt_silent, opt_silent,
); );
let mut buf: Vec<u8> = Vec::new(); let mut frame_data: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(
let iframe_data_url = data_to_data_url("text/html", &buf, "", ""); &mut frame_data,
&frame_dom.document,
SerializeOpts::default(),
)
.unwrap();
let frame_data_url = data_to_data_url(
&frame_media_type,
&frame_data,
&frame_final_url,
&frame_url_fragment,
);
attr.value.clear(); attr.value.clear();
attr.value.push_slice(iframe_data_url.as_str()); attr.value.push_slice(frame_data_url.as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(frame_full_url.clone()) {
attr.value.clear();
attr.value.push_slice(frame_full_url.as_str());
}
}
}
} }
} }
} }
"video" => { "video" => {
for attr in attrs_mut.iter_mut() { for attr in attrs_mut.iter_mut() {
if &attr.name.local == "poster" { let attr_name: &str = &attr.name.local;
let video_poster = attr.value.trim(); if attr_name.eq_ignore_ascii_case("poster") {
let video_poster_url = attr.value.trim();
// Skip posters with empty source // Skip posters with empty source
if video_poster.is_empty() { if video_poster_url.is_empty() {
continue; continue;
} }
if opt_no_images { if opt_no_images {
attr.value.clear(); attr.value.clear();
} else { continue;
let poster_full_url = }
resolve_url(&url, video_poster).unwrap_or_default();
let (poster_data_url, _) = retrieve_asset( let video_poster_full_url =
resolve_url(&url, video_poster_url).unwrap_or_default();
let video_poster_url_fragment =
get_url_fragment(video_poster_full_url.clone());
match retrieve_asset(
cache, cache,
client, client,
&url, &url,
&poster_full_url, &video_poster_full_url,
true,
"",
opt_silent, opt_silent,
) ) {
.unwrap_or((poster_full_url, str!())); Ok((
video_poster_data,
video_poster_final_url,
video_poster_media_type,
)) => {
let video_poster_data_url = data_to_data_url(
&video_poster_media_type,
&video_poster_data,
&video_poster_final_url,
&video_poster_url_fragment,
);
attr.value.clear(); attr.value.clear();
attr.value.push_slice(poster_data_url.as_str()); attr.value.push_slice(video_poster_data_url.as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(video_poster_full_url.clone()) {
attr.value.clear();
attr.value.push_slice(video_poster_full_url.as_str());
}
}
} }
} }
} }
@ -615,16 +829,15 @@ pub fn walk_and_embed_assets(
// Process style attributes // Process style attributes
if opt_no_css { if opt_no_css {
// Get rid of style attributes // Get rid of style attributes
let mut style_attr_indexes = Vec::new(); let mut i = 0;
for (i, attr) in attrs_mut.iter_mut().enumerate() { while i < attrs_mut.len() {
if attr.name.local.as_ref().eq_ignore_ascii_case("style") { let attr_name: &str = &attrs_mut[i].name.local;
style_attr_indexes.push(i); if attr_name.eq_ignore_ascii_case("style") {
attrs_mut.remove(i);
} else {
i += 1;
} }
} }
style_attr_indexes.reverse();
for attr_index in style_attr_indexes {
attrs_mut.remove(attr_index);
}
} else { } else {
// Otherwise, parse any links found in the attributes // Otherwise, parse any links found in the attributes
for attribute in attrs_mut for attribute in attrs_mut

View File

@ -1,5 +1,5 @@
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset}; use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client; use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::Url; use reqwest::Url;
@ -110,26 +110,24 @@ fn main() {
// Retrieve root document // Retrieve root document
if is_file_url(target_url) || is_http_url(target_url) { if is_file_url(target_url) || is_http_url(target_url) {
let (data, final_url) = retrieve_asset( match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
&mut cache, Ok((data, final_url, _media_type)) => {
&client,
target_url,
target_url,
false,
"",
app_args.silent,
)
.expect("Could not retrieve target document");
base_url = final_url; base_url = final_url;
dom = html_to_dom(&data); dom = html_to_dom(&String::from_utf8_lossy(&data));
}
Err(_) => {
eprintln!("Could not retrieve target document");
process::exit(1);
}
}
} else if is_data_url(target_url) { } else if is_data_url(target_url) {
let (media_type, text): (String, String) = data_url_to_text(target_url); let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
if !media_type.eq_ignore_ascii_case("text/html") { if !media_type.eq_ignore_ascii_case("text/html") {
eprintln!("Unsupported data URL media type"); eprintln!("Unsupported data URL media type");
process::exit(1); process::exit(1);
} }
base_url = str!(target_url); base_url = str!(target_url);
dom = html_to_dom(&text); dom = html_to_dom(&String::from_utf8_lossy(&data));
} else { } else {
process::exit(1); process::exit(1);
} }
@ -163,8 +161,8 @@ fn main() {
let mut clean_url = Url::parse(&base_url).unwrap(); let mut clean_url = Url::parse(&base_url).unwrap();
clean_url.set_fragment(None); clean_url.set_fragment(None);
// Don't include credentials // Don't include credentials
clean_url.set_username(""); clean_url.set_username("").unwrap();
clean_url.set_password(None); clean_url.set_password(None).unwrap();
let metadata_comment = if is_http_url(&base_url) { let metadata_comment = if is_http_url(&base_url) {
format!( format!(
"<!-- Saved from {} at {} using {} v{} -->\n", "<!-- Saved from {} at {} using {} v{} -->\n",

View File

@ -242,9 +242,9 @@ fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
<!DOCTYPE html><html lang=\"en\"><head>\n \ <!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \ <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \ <title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \ <link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
<link href=\"data:text/css;base64,\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \ <link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"\">\n \ <img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \ <a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \ <a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\ <script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
@ -306,12 +306,12 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \ <meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \ <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \ <title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \ <link rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \ <link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \ <img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \ <a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \ <a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\ <script></script>\n\n\n\n\
</body></html>\n\ </body></html>\n\
", ",
empty_image = empty_image!() empty_image = empty_image!()
@ -368,12 +368,12 @@ fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \ <meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \ <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \ <title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \ <link rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \ <link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \ <img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \ <a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \ <a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\ <script></script>\n\n\n\n\
</body></html>\n\ </body></html>\n\
", ",
empty_image = empty_image!() empty_image = empty_image!()
@ -417,7 +417,7 @@ fn passing_security_disallow_local_assets_within_data_url_targets(
// STDOUT should contain HTML with no JS in it // STDOUT should contain HTML with no JS in it
assert_eq!( assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(), std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script src=\"\"></script></head><body></body></html>\n" "<html><head><script></script></head><body></body></html>\n"
); );
// STDERR should be empty // STDERR should be empty

View File

@ -0,0 +1,92 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn empty_input_sha256() {
assert!(html::has_proper_integrity(
"".as_bytes(),
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
));
}
#[test]
fn sha256() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
));
}
#[test]
fn sha384() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
));
}
#[test]
fn sha512() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_hash() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
""
));
}
#[test]
fn empty_input_empty_hash() {
assert!(!html::has_proper_integrity("".as_bytes(), ""));
}
#[test]
fn sha256() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha256-badhash"
));
}
#[test]
fn sha384() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha384-badhash"
));
}
#[test]
fn sha512() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha512-badhash"
));
}
}

View File

@ -1,4 +1,5 @@
mod get_node_name; mod get_node_name;
mod has_proper_integrity;
mod is_icon; mod is_icon;
mod stringify_document; mod stringify_document;
mod walk_and_embed_assets; mod walk_and_embed_assets;

View File

@ -162,7 +162,7 @@ fn passing_no_css() {
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\ "<html>\
<head>\ <head>\
<link rel=\"stylesheet\" href=\"\">\ <link rel=\"stylesheet\">\
<style></style>\ <style></style>\
</head>\ </head>\
<body>\ <body>\
@ -210,7 +210,7 @@ fn passing_no_images() {
format!( format!(
"<html>\ "<html>\
<head>\ <head>\
<link rel=\"icon\" href=\"\">\ <link rel=\"icon\">\
</head>\ </head>\
<body>\ <body>\
<div>\ <div>\
@ -372,7 +372,7 @@ fn passing_no_js() {
assert_eq!( assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script src=\"\"></script>\ "<html><head></head><body><div><script></script>\
<script></script></div></body></html>" <script></script></div></body></html>"
); );
} }
@ -412,7 +412,7 @@ fn passing_with_no_integrity() {
assert_eq!( assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\ "<html>\
<head><title>No integrity</title><link rel=\"something\"><script src=\"\"></script></head>\ <head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\ <body></body>\
</html>" </html>"
); );

View File

@ -9,74 +9,74 @@ use crate::utils;
#[test] #[test]
fn passing_parse_text_html_base64() { fn passing_parse_text_html_base64() {
let (media_type, text) = utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="); let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html"); assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
text, String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
); );
} }
#[test] #[test]
fn passing_parse_text_html_utf8() { fn passing_parse_text_html_utf8() {
let (media_type, text) = utils::data_url_to_text( let (media_type, data) = utils::data_url_to_data(
"data:text/html;utf8,Work expands so as to fill the time available for its completion", "data:text/html;utf8,Work expands so as to fill the time available for its completion",
); );
assert_eq!(media_type, "text/html"); assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
text, String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
); );
} }
#[test] #[test]
fn passing_parse_text_html_plaintext() { fn passing_parse_text_html_plaintext() {
let (media_type, text) = utils::data_url_to_text( let (media_type, data) = utils::data_url_to_data(
"data:text/html,Work expands so as to fill the time available for its completion", "data:text/html,Work expands so as to fill the time available for its completion",
); );
assert_eq!(media_type, "text/html"); assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
text, String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
); );
} }
#[test] #[test]
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() { fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, text) = utils::data_url_to_text(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "); let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html"); assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
text, String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
); );
} }
#[test] #[test]
fn passing_parse_text_css_url_encoded() { fn passing_parse_text_css_url_encoded() {
let (media_type, text) = utils::data_url_to_text("data:text/css,div{background-color:%23000}"); let (media_type, data) = utils::data_url_to_data("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css"); assert_eq!(media_type, "text/css");
assert_eq!(text, "div{background-color:#000}"); assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
} }
#[test] #[test]
fn passing_parse_no_media_type_base64() { fn passing_parse_no_media_type_base64() {
let (media_type, text) = utils::data_url_to_text("data:;base64,dGVzdA=="); let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA==");
assert_eq!(media_type, ""); assert_eq!(media_type, "");
assert_eq!(text, "test"); assert_eq!(String::from_utf8_lossy(&data), "test");
} }
#[test] #[test]
fn passing_parse_no_media_type_no_encoding() { fn passing_parse_no_media_type_no_encoding() {
let (media_type, text) = utils::data_url_to_text("data:;,test%20test"); let (media_type, data) = utils::data_url_to_data("data:;,test%20test");
assert_eq!(media_type, ""); assert_eq!(media_type, "");
assert_eq!(text, "test test"); assert_eq!(String::from_utf8_lossy(&data), "test test");
} }
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@ -88,8 +88,8 @@ fn passing_parse_no_media_type_no_encoding() {
#[test] #[test]
fn failing_just_word_data() { fn failing_just_word_data() {
let (media_type, text) = utils::data_url_to_text("data"); let (media_type, data) = utils::data_url_to_data("data");
assert_eq!(media_type, ""); assert_eq!(media_type, "");
assert_eq!(text, ""); assert_eq!(String::from_utf8_lossy(&data), "");
} }

View File

@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@ -7,8 +5,12 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::utils;
#[test] #[test]
fn passing_decode_unicode_characters() { fn decode_unicode_characters() {
assert_eq!( assert_eq!(
utils::decode_url(str!( utils::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5" "%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
@ -18,7 +20,7 @@ fn passing_decode_unicode_characters() {
} }
#[test] #[test]
fn passing_decode_file_url() { fn decode_file_url() {
assert_eq!( assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")), utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html" "file:///tmp/space here/test#1.html"
@ -26,7 +28,7 @@ fn passing_decode_file_url() {
} }
#[test] #[test]
fn passing_plus_sign() { fn plus_sign() {
assert_eq!( assert_eq!(
utils::decode_url(str!( utils::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
@ -34,3 +36,4 @@ fn passing_plus_sign() {
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
); );
} }
}

View File

@ -1,6 +1,6 @@
mod clean_url; mod clean_url;
mod data_to_data_url; mod data_to_data_url;
mod data_url_to_text; mod data_url_to_data;
mod decode_url; mod decode_url;
mod detect_media_type; mod detect_media_type;
mod file_url_to_fs_path; mod file_url_to_fs_path;

View File

@ -17,38 +17,23 @@ fn passing_read_data_url() {
// If both source and target are data URLs, // If both source and target are data URLs,
// ensure the result contains target data URL // ensure the result contains target data URL
let (retrieved_data, final_url) = utils::retrieve_asset( let (data, final_url, media_type) = utils::retrieve_asset(
cache, cache,
&client, &client,
"data:text/html;base64,SoUrCe", "data:text/html;base64,c291cmNl",
"data:text/html;base64,TaRgEt", "data:text/html;base64,dGFyZ2V0",
true,
"",
false, false,
) )
.unwrap(); .unwrap();
assert_eq!(&retrieved_data, "data:text/html;base64,TaRgEt"); assert_eq!(
assert_eq!(&final_url, "data:text/html;base64,TaRgEt"); utils::data_to_data_url(&media_type, &data, &final_url, ""),
} utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
);
#[test] assert_eq!(
fn passing_read_data_url_ignore_suggested_media_type() { final_url,
let cache = &mut HashMap::new(); utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
let client = Client::new(); );
assert_eq!(&media_type, "text/html");
// Media type parameter should not influence data URLs
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"image/png",
false,
)
.unwrap();
assert_eq!(&data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
} }
#[test] #[test]
@ -60,7 +45,7 @@ fn passing_read_local_file_with_file_url_parent() {
// Inclusion of local assets from local sources should be allowed // Inclusion of local assets from local sources should be allowed
let cwd = env::current_dir().unwrap(); let cwd = env::current_dir().unwrap();
let (data, final_url) = utils::retrieve_asset( let (data, final_url, _media_type) = utils::retrieve_asset(
cache, cache,
&client, &client,
&format!( &format!(
@ -73,12 +58,10 @@ fn passing_read_local_file_with_file_url_parent() {
file = file_url_protocol, file = file_url_protocol,
cwd = cwd.to_str().unwrap() cwd = cwd.to_str().unwrap()
), ),
true,
"application/javascript",
false, false,
) )
.unwrap(); .unwrap();
assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!( assert_eq!(
&final_url, &final_url,
&format!( &format!(
@ -102,18 +85,20 @@ fn failing_read_local_file_with_data_url_parent() {
let client = Client::new(); let client = Client::new();
// Inclusion of local assets from data URL sources should not be allowed // Inclusion of local assets from data URL sources should not be allowed
let (data, final_url) = utils::retrieve_asset( match utils::retrieve_asset(
cache, cache,
&client, &client,
"data:text/html;base64,SoUrCe", "data:text/html;base64,SoUrCe",
"file:///etc/passwd", "file:///etc/passwd",
true,
"",
false, false,
) ) {
.unwrap(); Ok((..)) => {
assert_eq!(&data, ""); assert!(false);
assert_eq!(&final_url, ""); }
Err(_) => {
assert!(true);
}
}
} }
#[test] #[test]
@ -122,16 +107,18 @@ fn failing_read_local_file_with_https_parent() {
let client = Client::new(); let client = Client::new();
// Inclusion of local assets from remote sources should not be allowed // Inclusion of local assets from remote sources should not be allowed
let (data, final_url) = utils::retrieve_asset( match utils::retrieve_asset(
cache, cache,
&client, &client,
"https://kernel.org/", "https://kernel.org/",
"file:///etc/passwd", "file:///etc/passwd",
true,
"",
false, false,
) ) {
.unwrap(); Ok((..)) => {
assert_eq!(&data, ""); assert!(false);
assert_eq!(&final_url, ""); }
Err(_) => {
assert!(true);
}
}
} }

View File

@ -140,21 +140,19 @@ pub fn clean_url<T: AsRef<str>>(input: T) -> String {
result result
} }
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) { pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap()); let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string(); let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len()); let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect(); let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect(); let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let data: String = decode_url(raw_data); let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect(); let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut encoding: &str = "";
let mut media_type: String = str!(); let mut media_type: String = str!();
let mut text: String = str!(); let mut encoding: &str = "";
let mut i: i8 = 0; let mut i: i8 = 0;
for item in &meta_data_items { for item in &meta_data_items {
@ -172,15 +170,13 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) {
i = i + 1; i = i + 1;
} }
if is_plaintext_media_type(&media_type) || media_type.is_empty() { let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
if encoding.eq_ignore_ascii_case("base64") { base64::decode(&text).unwrap_or(vec![])
text = String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else { } else {
text = data text.as_bytes().to_vec()
} };
}
(media_type, text) (media_type, data)
} }
pub fn decode_url(input: String) -> String { pub fn decode_url(input: String) -> String {
@ -228,74 +224,52 @@ pub fn retrieve_asset(
client: &Client, client: &Client,
parent_url: &str, parent_url: &str,
url: &str, url: &str,
as_data_url: bool,
media_type: &str,
opt_silent: bool, opt_silent: bool,
) -> Result<(String, String), reqwest::Error> { ) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 { if url.len() == 0 {
return Ok((str!(), str!())); // Provoke error
client.get("").send()?;
} }
if is_data_url(&url) { if is_data_url(&url) {
if as_data_url { let (media_type, data) = data_url_to_data(url);
Ok((url.to_string(), url.to_string())) Ok((data, url.to_string(), media_type))
} else {
let (_media_type, text) = data_url_to_text(url);
Ok((text, url.to_string()))
}
} else if is_file_url(&url) { } else if is_file_url(&url) {
// Check if parent_url is also file:/// // Check if parent_url is also file:///
// (if not, then we don't embed the asset) // (if not, then we don't embed the asset)
if !is_file_url(&parent_url) { if !is_file_url(&parent_url) {
return Ok((str!(), str!())); // Provoke error
client.get("").send()?;
} }
let fs_file_path: String = file_url_to_fs_path(url); let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path); let path = Path::new(&fs_file_path);
let url_fragment = get_url_fragment(url);
if path.exists() { if path.exists() {
if !opt_silent { if !opt_silent {
eprintln!("{}", &url); eprintln!("{}", &url);
} }
if as_data_url { Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
let data_url: String = data_to_data_url(
&media_type,
&fs::read(&fs_file_path).unwrap(),
&fs_file_path,
&url_fragment,
);
Ok((data_url, url.to_string()))
} else { } else {
let data: String = fs::read_to_string(&fs_file_path).expect(url); // Provoke error
Ok((data, url.to_string())) Err(client.get("").send().unwrap_err())
}
} else {
Ok((str!(), url.to_string()))
} }
} else { } else {
let cache_key: String = clean_url(&url); let cache_key: String = clean_url(&url);
if cache.contains_key(&cache_key) { if cache.contains_key(&cache_key) {
// URL is in cache, we retrieve it // URL is in cache, we get and return it
let data = cache.get(&cache_key).unwrap();
if !opt_silent { if !opt_silent {
eprintln!("{} (from cache)", &url); eprintln!("{} (from cache)", &url);
} }
if as_data_url {
let url_fragment = get_url_fragment(url);
Ok(( Ok((
data_to_data_url(media_type, data, url, &url_fragment), cache.get(&cache_key).unwrap().to_vec(),
url.to_string(), url.to_string(),
str!(),
)) ))
} else { } else {
Ok((String::from_utf8_lossy(data).to_string(), url.to_string())) // URL not in cache, we retrieve the file
}
} else {
// URL not in cache, we request it
let mut response = client.get(url).send()?; let mut response = client.get(url).send()?;
let res_url = response.url().to_string(); let res_url = response.url().to_string();
@ -309,36 +283,21 @@ pub fn retrieve_asset(
let new_cache_key: String = clean_url(&res_url); let new_cache_key: String = clean_url(&res_url);
if as_data_url {
// Convert response into a byte array // Convert response into a byte array
let mut data: Vec<u8> = vec![]; let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?; response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header // Attempt to obtain media type by reading the Content-Type header
let media_type = if media_type == "" { let media_type = response
response
.headers() .headers()
.get(CONTENT_TYPE) .get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok()) .and_then(|header| header.to_str().ok())
.unwrap_or(&media_type) .unwrap_or("");
} else {
media_type
};
let url_fragment = get_url_fragment(url);
let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);
// Add to cache // Add to cache
cache.insert(new_cache_key, data); cache.insert(new_cache_key, data.clone());
Ok((data_url, res_url)) Ok((data, res_url, media_type.to_string()))
} else {
let content = response.text().unwrap();
// Add to cache
cache.insert(new_cache_key, content.as_bytes().to_vec());
Ok((content, res_url))
}
} }
} }
} }