Compare commits
245 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
7c61b462dd | ||
|
ef3684025b | ||
|
db7ee697b3 | ||
|
89ce5029b9 | ||
|
54609b10e5 | ||
|
013d93bacc | ||
|
0df8613789 | ||
|
68a1531a11 | ||
|
99c3be1804 | ||
|
80559e7224 | ||
|
c5c5f1ca44 | ||
|
de6a13a884 | ||
|
ef16355f9f | ||
|
a4dc0ed9b4 | ||
|
cd0e366979 | ||
|
d4c6c458f9 | ||
|
c9970b3a8e | ||
|
404d322b99 | ||
|
1b353d0b46 | ||
|
f920a5e4d6 | ||
|
d3ca1ecad3 | ||
|
9e057472c6 | ||
|
d453145bf8 | ||
|
8c131d649f | ||
|
a221fdb368 | ||
|
15dd82e300 | ||
|
de492caaa5 | ||
|
9096447c70 | ||
|
354340db86 | ||
|
900dd8d163 | ||
|
a11c4496b0 | ||
|
dd33b16876 | ||
|
2cc1870033 | ||
|
d41e6c041b | ||
|
460a461373 | ||
|
1e6e87b6aa | ||
|
54094270b3 | ||
|
e6cf367e23 | ||
|
e8437ecb28 | ||
|
543bebbd8d | ||
|
dc6c0200bc | ||
|
04bdb3072f | ||
|
a9228f0522 | ||
|
aae68c4c82 | ||
|
dd23826205 | ||
|
781f4cd3b5 | ||
|
6826b59ab9 | ||
|
2be725eeb5 | ||
|
dd2e9ca2e5 | ||
|
50bccae476 | ||
|
b3bcb1d85b | ||
|
c58d044459 | ||
|
eeaea0df16 | ||
|
2539aac4c0 | ||
|
03b9af543a | ||
|
1bb8141021 | ||
|
4bc8043f0f | ||
|
5effa38392 | ||
|
125aeeec3b | ||
|
c938ba6a2f | ||
|
f354affc36 | ||
|
7686b2ea64 | ||
|
b29b9a6a7c | ||
|
cbda57cfa8 | ||
|
b8aa545e8c | ||
|
22a031af5d | ||
|
6e6a60b305 | ||
|
77d6022d84 | ||
|
5db19d1a3e | ||
|
a6e891b3c5 | ||
|
d7a82a008b | ||
|
2369a4dd3c | ||
|
d27e53fb36 | ||
|
2cb51477d2 | ||
|
a308a20411 | ||
|
a6ddf1c13a | ||
|
8256d17efd | ||
|
efa12935ba | ||
|
7126a98023 | ||
|
c7ee3ec6e2 | ||
|
c4218031e2 | ||
|
6f918f6c1c | ||
|
6ecda080e8 | ||
|
2e86ee67a5 | ||
|
359616b901 | ||
|
ea2cdab330 | ||
|
4434823c46 | ||
|
e0a78ffc9d | ||
|
cbbb297473 | ||
|
98ddb821a5 | ||
|
be097b1d4e | ||
|
325688acf5 | ||
|
11207d49d2 | ||
|
96da64e193 | ||
|
8a62a51210 | ||
|
a6ac1df93d | ||
|
49e81149df | ||
|
a3516b2ae9 | ||
|
385301bf16 | ||
|
4921a70dda | ||
|
e0273c664a | ||
|
6d629bfd4a | ||
|
ae9d78a891 | ||
|
0f55fb3c49 | ||
|
e41fd6a1c6 | ||
|
eaf662bb3b | ||
|
fa71f6a42c | ||
|
9a27c6c5ee | ||
|
4ad07c0519 | ||
|
e78405f2ae | ||
|
e81462be41 | ||
|
b972d717ce | ||
|
edb679d2b3 | ||
|
2e1462a953 | ||
|
57883b84b2 | ||
|
4fa2eda983 | ||
|
028187a31e | ||
|
c469c30cbd | ||
|
6de36243f9 | ||
|
4f162d0cc0 | ||
|
95040173fc | ||
|
b10d41f82e | ||
|
4c2c55d166 | ||
|
2dd1c465e4 | ||
|
a5afda9c80 | ||
|
ab6fed6d1f | ||
|
f8dcb335e7 | ||
|
913051870a | ||
|
614a518475 | ||
|
870a4b150e | ||
|
0533b287b7 | ||
|
4ba4285b6b | ||
|
2b9caf9840 | ||
|
8adf059980 | ||
|
8ad252868e | ||
|
e145df372f | ||
|
816b6175ac | ||
|
d89b4d5f5b | ||
|
15d98a7269 | ||
|
36e82cb511 | ||
|
1b1befd7b0 | ||
|
a2f59b4418 | ||
|
124a62920f | ||
|
f557504bed | ||
|
5ac520b4da | ||
|
7a97291498 | ||
|
38a6f963ad | ||
|
052f8f49ec | ||
|
08de486382 | ||
|
c0e0a69773 | ||
|
1636540693 | ||
|
3e80cb02ce | ||
|
a296531b3f | ||
|
8462b6bc31 | ||
|
92f38556b6 | ||
|
c0bdeab2e3 | ||
|
5a502eab4b | ||
|
19f08265a2 | ||
|
1d6392cb28 | ||
|
03cdc0e0b2 | ||
|
b98b7af0b4 | ||
|
73c35eaccb | ||
|
2c5d1e930b | ||
|
90f7c3a0d0 | ||
|
c1fec5967d | ||
|
09d41d2cf1 | ||
|
8f1da3c792 | ||
|
a8449a2b32 | ||
|
164e728ad3 | ||
|
8883bd6aca | ||
|
eae5d4dc6b | ||
|
ec85121d28 | ||
|
a8a85a4191 | ||
|
decd5b2119 | ||
|
bef6d848e9 | ||
|
4263e42cd1 | ||
|
23de5ced21 | ||
|
bc98aca2a2 | ||
|
438ebd520a | ||
|
ddb97009e9 | ||
|
6e67545b92 | ||
|
9e5d8ec691 | ||
|
fb835fae28 | ||
|
29bf042da0 | ||
|
d67483cf8e | ||
|
4140d8ebad | ||
|
2ac964fae5 | ||
|
ae5d6d2df4 | ||
|
2ed151d883 | ||
|
3cdfdc45d3 | ||
|
ac04af2cfc | ||
|
769953d7bd | ||
|
136dcc31cf | ||
|
44cac65a83 | ||
|
c3ca2ad1d5 | ||
|
0347fd3985 | ||
|
95d0083b3c | ||
|
3ce26b5fdd | ||
|
7f9458adfe | ||
|
5c229c51da | ||
|
f6ea16b3ad | ||
|
877b11d52c | ||
|
f9aac6f41b | ||
|
0a30c286fe | ||
|
ea56b9b4c1 | ||
|
e821591efe | ||
|
4e5d2fdc8d | ||
|
7c2ed2c9ca | ||
|
60d21ae071 | ||
|
bfdcd459e1 | ||
|
6c020dfa88 | ||
|
9894213393 | ||
|
80523c5a59 | ||
|
65b5ff4ec0 | ||
|
4e31d0433e | ||
|
ed82b96152 | ||
|
f16a2a9ed5 | ||
|
38d7873d6e | ||
|
d848179a43 | ||
|
399f515eeb | ||
|
46616f327b | ||
|
090d647390 | ||
|
4fa88b7aba | ||
|
3d678d80ee | ||
|
19a87f426e | ||
|
cbe3f9f554 | ||
|
b6a44c64cf | ||
|
84e2dd789c | ||
|
ac4945ca97 | ||
|
2ca2c7aff8 | ||
|
a18df74946 | ||
|
2bc8414cc1 | ||
|
c4569343a4 | ||
|
5f5820c71a | ||
|
4719a6fecf | ||
|
c999359b9f | ||
|
f22e2b6e68 | ||
|
31a9550f5b | ||
|
201f2d61b9 | ||
|
3ae4dfae8e | ||
|
8fbae735fa | ||
|
05985583f0 | ||
|
651fa716b4 | ||
|
9be3982dc6 | ||
|
27c9fb4cd3 |
13
.github/workflows/build_gnu_linux.yml
vendored
13
.github/workflows/build_gnu_linux.yml
vendored
@ -3,6 +3,17 @@ name: GNU/Linux
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ master ]
|
branches: [ master ]
|
||||||
|
paths-ignore:
|
||||||
|
- 'assets/'
|
||||||
|
- 'dist/'
|
||||||
|
- 'docs/'
|
||||||
|
- 'snap/'
|
||||||
|
- '.adr-dir'
|
||||||
|
- 'Dockerfile'
|
||||||
|
- 'LICENSE'
|
||||||
|
- 'Makefile'
|
||||||
|
- 'monolith.nuspec'
|
||||||
|
- 'README.md'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
@ -17,6 +28,8 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- run: git config --global core.autocrlf false
|
- run: git config --global core.autocrlf false
|
||||||
|
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --all --locked --verbose
|
run: cargo build --all --locked --verbose
|
||||||
|
13
.github/workflows/build_macos.yml
vendored
13
.github/workflows/build_macos.yml
vendored
@ -3,6 +3,17 @@ name: macOS
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ master ]
|
branches: [ master ]
|
||||||
|
paths-ignore:
|
||||||
|
- 'assets/'
|
||||||
|
- 'dist/'
|
||||||
|
- 'docs/'
|
||||||
|
- 'snap/'
|
||||||
|
- '.adr-dir'
|
||||||
|
- 'Dockerfile'
|
||||||
|
- 'LICENSE'
|
||||||
|
- 'Makefile'
|
||||||
|
- 'monolith.nuspec'
|
||||||
|
- 'README.md'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
@ -17,6 +28,8 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- run: git config --global core.autocrlf false
|
- run: git config --global core.autocrlf false
|
||||||
|
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --all --locked --verbose
|
run: cargo build --all --locked --verbose
|
||||||
|
13
.github/workflows/build_windows.yml
vendored
13
.github/workflows/build_windows.yml
vendored
@ -3,6 +3,17 @@ name: Windows
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ master ]
|
branches: [ master ]
|
||||||
|
paths-ignore:
|
||||||
|
- 'assets/'
|
||||||
|
- 'dist/'
|
||||||
|
- 'docs/'
|
||||||
|
- 'snap/'
|
||||||
|
- '.adr-dir'
|
||||||
|
- 'Dockerfile'
|
||||||
|
- 'LICENSE'
|
||||||
|
- 'Makefile'
|
||||||
|
- 'monolith.nuspec'
|
||||||
|
- 'README.md'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
@ -17,6 +28,8 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- run: git config --global core.autocrlf false
|
- run: git config --global core.autocrlf false
|
||||||
|
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --all --locked --verbose
|
run: cargo build --all --locked --verbose
|
||||||
|
80
.github/workflows/cd.yml
vendored
80
.github/workflows/cd.yml
vendored
@ -1,4 +1,4 @@
|
|||||||
# CD GitHub Actions workflow for Monolith
|
# CD GitHub Actions workflow for monolith
|
||||||
|
|
||||||
name: CD
|
name: CD
|
||||||
|
|
||||||
@ -13,10 +13,13 @@ jobs:
|
|||||||
runs-on: windows-2019
|
runs-on: windows-2019
|
||||||
steps:
|
steps:
|
||||||
- run: git config --global core.autocrlf false
|
- run: git config --global core.autocrlf false
|
||||||
|
|
||||||
- name: Checkout the repository
|
- name: Checkout the repository
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
- name: Build and install the executable
|
|
||||||
|
- name: Build the executable
|
||||||
run: cargo build --release
|
run: cargo build --release
|
||||||
|
|
||||||
- uses: Shopify/upload-to-release@1.0.0
|
- uses: Shopify/upload-to-release@1.0.0
|
||||||
with:
|
with:
|
||||||
name: monolith.exe
|
name: monolith.exe
|
||||||
@ -28,27 +31,78 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout the repository
|
- name: Checkout the repository
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Prepare cross-platform environment
|
- name: Prepare cross-platform environment
|
||||||
run: |
|
run: |
|
||||||
sudo mkdir -p /cross-build-arm
|
sudo mkdir /cross-build
|
||||||
sudo echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" >> /etc/apt/sources.list
|
sudo touch /etc/apt/sources.list.d/armhf.list
|
||||||
|
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
|
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
|
||||||
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
|
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
|
||||||
sudo dpkg -x libssl1.1*.deb /cross-build-arm
|
sudo dpkg -x libssl1.1*.deb /cross-build
|
||||||
sudo dpkg -x libssl-dev*.deb /cross-build-arm
|
sudo dpkg -x libssl-dev*.deb /cross-build
|
||||||
rustup target add arm-unknown-linux-gnueabihf
|
rustup target add arm-unknown-linux-gnueabihf
|
||||||
echo "::set-env name=C_INCLUDE_PATH::/cross-build-arm/usr/include"
|
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
|
||||||
echo "::set-env name=OPENSSL_INCLUDE_DIR::/cross-build-arm/usr/include/arm-linux-gnueabihf"
|
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||||
echo "::set-env name=OPENSSL_LIB_DIR::/cross-build-arm/usr/lib/arm-linux-gnueabihf"
|
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||||
echo "::set-env name=PKG_CONFIG_ALLOW_CROSS::1"
|
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
|
||||||
echo "::set-env name=RUSTFLAGS::-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf"
|
echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build/usr/lib/arm-linux-gnueabihf -L/cross-build/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Build the executable
|
- name: Build the executable
|
||||||
run: |
|
run: cargo build --release --target=arm-unknown-linux-gnueabihf
|
||||||
cargo build --release --target=arm-unknown-linux-gnueabihf
|
|
||||||
- name: Attach artifact to the release
|
- name: Attach artifact to the release
|
||||||
uses: Shopify/upload-to-release@1.0.0
|
uses: Shopify/upload-to-release@1.0.0
|
||||||
with:
|
with:
|
||||||
name: monolith-gnu-linux-armhf
|
name: monolith-gnu-linux-armhf
|
||||||
path: target/arm-unknown-linux-gnueabihf/release/monolith
|
path: target/arm-unknown-linux-gnueabihf/release/monolith
|
||||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
gnu_linux_aarch64:
|
||||||
|
runs-on: ubuntu-18.04
|
||||||
|
steps:
|
||||||
|
- name: Checkout the repository
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Prepare cross-platform environment
|
||||||
|
run: |
|
||||||
|
sudo mkdir /cross-build
|
||||||
|
sudo touch /etc/apt/sources.list.d/arm64.list
|
||||||
|
echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/arm64.list
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross
|
||||||
|
sudo apt-get download libssl1.1:arm64 libssl-dev:arm64
|
||||||
|
sudo dpkg -x libssl1.1*.deb /cross-build
|
||||||
|
sudo dpkg -x libssl-dev*.deb /cross-build
|
||||||
|
rustup target add aarch64-unknown-linux-gnu
|
||||||
|
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
|
||||||
|
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/aarch64-linux-gnu" >> $GITHUB_ENV
|
||||||
|
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
|
||||||
|
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
|
||||||
|
echo "RUSTFLAGS=-C linker=aarch64-linux-gnu-gcc -L/usr/aarch64-linux-gnu/lib -L/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Build the executable
|
||||||
|
run: cargo build --release --target=aarch64-unknown-linux-gnu
|
||||||
|
|
||||||
|
- name: Attach artifact to the release
|
||||||
|
uses: Shopify/upload-to-release@1.0.0
|
||||||
|
with:
|
||||||
|
name: monolith-gnu-linux-aarch64
|
||||||
|
path: target/aarch64-unknown-linux-gnu/release/monolith
|
||||||
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
gnu_linux_x86_64:
|
||||||
|
runs-on: ubuntu-18.04
|
||||||
|
steps:
|
||||||
|
- name: Checkout the repository
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Build the executable
|
||||||
|
run: cargo build --release
|
||||||
|
|
||||||
|
- uses: Shopify/upload-to-release@1.0.0
|
||||||
|
with:
|
||||||
|
name: monolith-gnu-linux-x86_64
|
||||||
|
path: target/release/monolith
|
||||||
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
21
.github/workflows/ci.yml
vendored
21
.github/workflows/ci.yml
vendored
@ -1,8 +1,21 @@
|
|||||||
|
# CI GitHub Actions workflow for monolith
|
||||||
|
|
||||||
name: CI
|
name: CI
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ master ]
|
branches: [ master ]
|
||||||
|
paths-ignore:
|
||||||
|
- 'assets/'
|
||||||
|
- 'dist/'
|
||||||
|
- 'docs/'
|
||||||
|
- 'snap/'
|
||||||
|
- '.adr-dir'
|
||||||
|
- 'Dockerfile'
|
||||||
|
- 'LICENSE'
|
||||||
|
- 'Makefile'
|
||||||
|
- 'monolith.nuspec'
|
||||||
|
- 'README.md'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build_and_test:
|
build_and_test:
|
||||||
@ -21,10 +34,16 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- run: git config --global core.autocrlf false
|
- run: git config --global core.autocrlf false
|
||||||
|
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --all --locked --verbose
|
run: cargo build --all --locked --verbose
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: cargo test --all --locked --verbose
|
run: cargo test --all --locked --verbose
|
||||||
|
|
||||||
- name: Check code formatting
|
- name: Check code formatting
|
||||||
run: cargo fmt --all -- --check
|
run: |
|
||||||
|
rustup component add rustfmt
|
||||||
|
cargo fmt --all -- --check
|
||||||
|
1880
Cargo.lock
generated
1880
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
40
Cargo.toml
40
Cargo.toml
@ -1,7 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "monolith"
|
name = "monolith"
|
||||||
version = "2.2.4"
|
version = "2.6.2"
|
||||||
edition = "2018"
|
|
||||||
authors = [
|
authors = [
|
||||||
"Sunshine <sunshine@uberspace.net>",
|
"Sunshine <sunshine@uberspace.net>",
|
||||||
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
||||||
@ -9,20 +8,41 @@ authors = [
|
|||||||
"Emi Simpson <emi@alchemi.dev>",
|
"Emi Simpson <emi@alchemi.dev>",
|
||||||
"rhysd <lin90162@yahoo.co.jp>",
|
"rhysd <lin90162@yahoo.co.jp>",
|
||||||
]
|
]
|
||||||
|
edition = "2018"
|
||||||
description = "CLI tool for saving web pages as a single HTML file"
|
description = "CLI tool for saving web pages as a single HTML file"
|
||||||
|
homepage = "https://github.com/Y2Z/monolith"
|
||||||
|
repository = "https://github.com/Y2Z/monolith"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["web", "http", "html", "download", "command-line"]
|
||||||
|
categories = ["command-line-utilities", "web-programming"]
|
||||||
|
include = [
|
||||||
|
"src/*.rs",
|
||||||
|
"Cargo.toml",
|
||||||
|
]
|
||||||
|
license = "CC0-1.0"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
base64 = "0.12.0"
|
atty = "0.2.14" # Used for highlighting network errors
|
||||||
clap = "2.33.0"
|
base64 = "0.13.0" # Used for integrity attributes
|
||||||
cssparser = "0.27.2"
|
chrono = "0.4.20" # Used for formatting creation timestamp
|
||||||
|
clap = "3.2.16"
|
||||||
|
cssparser = "0.29.6"
|
||||||
|
encoding_rs = "0.8.31"
|
||||||
html5ever = "0.24.1"
|
html5ever = "0.24.1"
|
||||||
url = "2.1.1"
|
percent-encoding = "2.1.0"
|
||||||
|
sha2 = "0.10.2" # Used for calculating checksums during integrity checks
|
||||||
|
url = "2.2.2"
|
||||||
|
|
||||||
|
# Used for parsing srcset and NOSCRIPT
|
||||||
|
[dependencies.regex]
|
||||||
|
version = "1.6.0"
|
||||||
|
default-features = false
|
||||||
|
features = ["std", "perf-dfa", "unicode-perl"]
|
||||||
|
|
||||||
[dependencies.reqwest]
|
[dependencies.reqwest]
|
||||||
version = "0.10.*"
|
version = "0.11.11"
|
||||||
default-features = false
|
default-features = false
|
||||||
features = ["default-tls", "blocking", "gzip"]
|
features = ["default-tls", "blocking", "gzip", "brotli", "deflate"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_cmd = "0.12.0"
|
assert_cmd = "2.0.4"
|
||||||
tempfile = "3.1.0"
|
|
||||||
|
28
Dockerfile
28
Dockerfile
@ -1,18 +1,22 @@
|
|||||||
FROM rust
|
FROM ekidd/rust-musl-builder as builder
|
||||||
|
|
||||||
WORKDIR /usr/local/src/
|
|
||||||
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
|
|
||||||
| grep "tarball_url.*\"," \
|
|
||||||
| cut -d '"' -f 4 \
|
|
||||||
| wget -qi - -O monolith.tar.gz
|
|
||||||
|
|
||||||
|
RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
|
||||||
|
| grep "tarball_url.*\"," \
|
||||||
|
| cut -d '"' -f 4)
|
||||||
RUN tar xfz monolith.tar.gz \
|
RUN tar xfz monolith.tar.gz \
|
||||||
&& mv Y2Z-monolith-* monolith \
|
&& mv Y2Z-monolith-* monolith \
|
||||||
&& rm monolith.tar.gz
|
&& rm monolith.tar.gz
|
||||||
|
|
||||||
WORKDIR /usr/local/src/monolith
|
WORKDIR monolith/
|
||||||
RUN ls -a
|
|
||||||
RUN make install
|
RUN make install
|
||||||
|
|
||||||
|
|
||||||
|
FROM alpine
|
||||||
|
|
||||||
|
RUN apk update && \
|
||||||
|
apk add --no-cache openssl && \
|
||||||
|
rm -rf "/var/cache/apk/*"
|
||||||
|
|
||||||
|
COPY --from=builder /home/rust/.cargo/bin/monolith /usr/bin/monolith
|
||||||
WORKDIR /tmp
|
WORKDIR /tmp
|
||||||
CMD ["/usr/local/cargo/bin/monolith"]
|
ENTRYPOINT ["/usr/bin/monolith"]
|
||||||
|
137
LICENSE
137
LICENSE
@ -1,24 +1,121 @@
|
|||||||
This is free and unencumbered software released into the public domain.
|
Creative Commons Legal Code
|
||||||
|
|
||||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
CC0 1.0 Universal
|
||||||
distribute this software, either in source code form or as a compiled
|
|
||||||
binary, for any purpose, commercial or non-commercial, and by any
|
|
||||||
means.
|
|
||||||
|
|
||||||
In jurisdictions that recognize copyright laws, the author or authors
|
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
|
||||||
of this software dedicate any and all copyright interest in the
|
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
|
||||||
software to the public domain. We make this dedication for the benefit
|
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
|
||||||
of the public at large and to the detriment of our heirs and
|
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
|
||||||
successors. We intend this dedication to be an overt act of
|
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
|
||||||
relinquishment in perpetuity of all present and future rights to this
|
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
|
||||||
software under copyright law.
|
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
|
||||||
|
HEREUNDER.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
Statement of Purpose
|
||||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
||||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
||||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
For more information, please refer to <http://unlicense.org>
|
The laws of most jurisdictions throughout the world automatically confer
|
||||||
|
exclusive Copyright and Related Rights (defined below) upon the creator
|
||||||
|
and subsequent owner(s) (each and all, an "owner") of an original work of
|
||||||
|
authorship and/or a database (each, a "Work").
|
||||||
|
|
||||||
|
Certain owners wish to permanently relinquish those rights to a Work for
|
||||||
|
the purpose of contributing to a commons of creative, cultural and
|
||||||
|
scientific works ("Commons") that the public can reliably and without fear
|
||||||
|
of later claims of infringement build upon, modify, incorporate in other
|
||||||
|
works, reuse and redistribute as freely as possible in any form whatsoever
|
||||||
|
and for any purposes, including without limitation commercial purposes.
|
||||||
|
These owners may contribute to the Commons to promote the ideal of a free
|
||||||
|
culture and the further production of creative, cultural and scientific
|
||||||
|
works, or to gain reputation or greater distribution for their Work in
|
||||||
|
part through the use and efforts of others.
|
||||||
|
|
||||||
|
For these and/or other purposes and motivations, and without any
|
||||||
|
expectation of additional consideration or compensation, the person
|
||||||
|
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
|
||||||
|
is an owner of Copyright and Related Rights in the Work, voluntarily
|
||||||
|
elects to apply CC0 to the Work and publicly distribute the Work under its
|
||||||
|
terms, with knowledge of his or her Copyright and Related Rights in the
|
||||||
|
Work and the meaning and intended legal effect of CC0 on those rights.
|
||||||
|
|
||||||
|
1. Copyright and Related Rights. A Work made available under CC0 may be
|
||||||
|
protected by copyright and related or neighboring rights ("Copyright and
|
||||||
|
Related Rights"). Copyright and Related Rights include, but are not
|
||||||
|
limited to, the following:
|
||||||
|
|
||||||
|
i. the right to reproduce, adapt, distribute, perform, display,
|
||||||
|
communicate, and translate a Work;
|
||||||
|
ii. moral rights retained by the original author(s) and/or performer(s);
|
||||||
|
iii. publicity and privacy rights pertaining to a person's image or
|
||||||
|
likeness depicted in a Work;
|
||||||
|
iv. rights protecting against unfair competition in regards to a Work,
|
||||||
|
subject to the limitations in paragraph 4(a), below;
|
||||||
|
v. rights protecting the extraction, dissemination, use and reuse of data
|
||||||
|
in a Work;
|
||||||
|
vi. database rights (such as those arising under Directive 96/9/EC of the
|
||||||
|
European Parliament and of the Council of 11 March 1996 on the legal
|
||||||
|
protection of databases, and under any national implementation
|
||||||
|
thereof, including any amended or successor version of such
|
||||||
|
directive); and
|
||||||
|
vii. other similar, equivalent or corresponding rights throughout the
|
||||||
|
world based on applicable law or treaty, and any national
|
||||||
|
implementations thereof.
|
||||||
|
|
||||||
|
2. Waiver. To the greatest extent permitted by, but not in contravention
|
||||||
|
of, applicable law, Affirmer hereby overtly, fully, permanently,
|
||||||
|
irrevocably and unconditionally waives, abandons, and surrenders all of
|
||||||
|
Affirmer's Copyright and Related Rights and associated claims and causes
|
||||||
|
of action, whether now known or unknown (including existing as well as
|
||||||
|
future claims and causes of action), in the Work (i) in all territories
|
||||||
|
worldwide, (ii) for the maximum duration provided by applicable law or
|
||||||
|
treaty (including future time extensions), (iii) in any current or future
|
||||||
|
medium and for any number of copies, and (iv) for any purpose whatsoever,
|
||||||
|
including without limitation commercial, advertising or promotional
|
||||||
|
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
|
||||||
|
member of the public at large and to the detriment of Affirmer's heirs and
|
||||||
|
successors, fully intending that such Waiver shall not be subject to
|
||||||
|
revocation, rescission, cancellation, termination, or any other legal or
|
||||||
|
equitable action to disrupt the quiet enjoyment of the Work by the public
|
||||||
|
as contemplated by Affirmer's express Statement of Purpose.
|
||||||
|
|
||||||
|
3. Public License Fallback. Should any part of the Waiver for any reason
|
||||||
|
be judged legally invalid or ineffective under applicable law, then the
|
||||||
|
Waiver shall be preserved to the maximum extent permitted taking into
|
||||||
|
account Affirmer's express Statement of Purpose. In addition, to the
|
||||||
|
extent the Waiver is so judged Affirmer hereby grants to each affected
|
||||||
|
person a royalty-free, non transferable, non sublicensable, non exclusive,
|
||||||
|
irrevocable and unconditional license to exercise Affirmer's Copyright and
|
||||||
|
Related Rights in the Work (i) in all territories worldwide, (ii) for the
|
||||||
|
maximum duration provided by applicable law or treaty (including future
|
||||||
|
time extensions), (iii) in any current or future medium and for any number
|
||||||
|
of copies, and (iv) for any purpose whatsoever, including without
|
||||||
|
limitation commercial, advertising or promotional purposes (the
|
||||||
|
"License"). The License shall be deemed effective as of the date CC0 was
|
||||||
|
applied by Affirmer to the Work. Should any part of the License for any
|
||||||
|
reason be judged legally invalid or ineffective under applicable law, such
|
||||||
|
partial invalidity or ineffectiveness shall not invalidate the remainder
|
||||||
|
of the License, and in such case Affirmer hereby affirms that he or she
|
||||||
|
will not (i) exercise any of his or her remaining Copyright and Related
|
||||||
|
Rights in the Work or (ii) assert any associated claims and causes of
|
||||||
|
action with respect to the Work, in either case contrary to Affirmer's
|
||||||
|
express Statement of Purpose.
|
||||||
|
|
||||||
|
4. Limitations and Disclaimers.
|
||||||
|
|
||||||
|
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
||||||
|
surrendered, licensed or otherwise affected by this document.
|
||||||
|
b. Affirmer offers the Work as-is and makes no representations or
|
||||||
|
warranties of any kind concerning the Work, express, implied,
|
||||||
|
statutory or otherwise, including without limitation warranties of
|
||||||
|
title, merchantability, fitness for a particular purpose, non
|
||||||
|
infringement, or the absence of latent or other defects, accuracy, or
|
||||||
|
the present or absence of errors, whether or not discoverable, all to
|
||||||
|
the greatest extent permissible under applicable law.
|
||||||
|
c. Affirmer disclaims responsibility for clearing rights of other persons
|
||||||
|
that may apply to the Work or any use thereof, including without
|
||||||
|
limitation any person's Copyright and Related Rights in the Work.
|
||||||
|
Further, Affirmer disclaims responsibility for obtaining any necessary
|
||||||
|
consents, permissions or other rights required for any use of the
|
||||||
|
Work.
|
||||||
|
d. Affirmer understands and acknowledges that Creative Commons is not a
|
||||||
|
party to this document and has no duty or obligation with respect to
|
||||||
|
this CC0 or use of the Work.
|
||||||
|
6
Makefile
6
Makefile
@ -10,7 +10,7 @@ build:
|
|||||||
test: build
|
test: build
|
||||||
@cargo test --locked
|
@cargo test --locked
|
||||||
@cargo fmt --all -- --check
|
@cargo fmt --all -- --check
|
||||||
.PHONY: test_code_formatting
|
.PHONY: test
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
@cargo fmt --all --
|
@cargo fmt --all --
|
||||||
@ -23,3 +23,7 @@ install:
|
|||||||
uninstall:
|
uninstall:
|
||||||
@cargo uninstall
|
@cargo uninstall
|
||||||
.PHONY: uninstall
|
.PHONY: uninstall
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@cargo clean
|
||||||
|
.PHONY: clean
|
||||||
|
150
README.md
150
README.md
@ -1,15 +1,15 @@
|
|||||||
[![Monolith Build Status for GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
|
[![monolith build status on GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
|
||||||
[![Monolith Build Status for macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
|
[![monolith build status on macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
|
||||||
[![Monolith Build Status for Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
|
[![monolith build status on Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
|
||||||
|
|
||||||
```
|
```
|
||||||
___ ___________ __________ ___________________ ___
|
_____ ______________ __________ ___________________ ___
|
||||||
| \ / \ | | | | | |
|
| \ / \ | | | | | |
|
||||||
| \_/ __ \_| __ | | ___ ___ |__| |
|
| \_/ __ \_| __ | | ___ ___ |__| |
|
||||||
| | | | | | | | | | | |
|
| | | | | | | | | | | |
|
||||||
| |__| _ |__| |____| | | | | __ |
|
| |\ /| |__| _ |__| |____| | | | | __ |
|
||||||
| |\_/| | \ | | | | | | |
|
| | \___/ | | \ | | | | | | |
|
||||||
|___| |__________| \____________________| |___| |___| |___|
|
|___| |__________| \_____________________| |___| |___| |___|
|
||||||
```
|
```
|
||||||
|
|
||||||
A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
|
A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
|
||||||
@ -18,70 +18,158 @@ Unlike the conventional “Save page as”, `monolith` not only saves the target
|
|||||||
|
|
||||||
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
|
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
#### Via Homebrew (on macOS and GNU/Linux)
|
#### Using [Cargo](https://crates.io/crates/monolith)
|
||||||
$ brew install monolith
|
|
||||||
|
|
||||||
#### Using Snapcraft (on GNU/Linux)
|
```console
|
||||||
$ snap install monolith
|
cargo install monolith
|
||||||
|
```
|
||||||
|
|
||||||
#### From source
|
#### Via [Homebrew](https://formulae.brew.sh/formula/monolith) (macOS and GNU/Linux)
|
||||||
|
|
||||||
Dependency: `libssl-dev`
|
```console
|
||||||
|
brew install monolith
|
||||||
|
```
|
||||||
|
|
||||||
$ git clone https://github.com/Y2Z/monolith.git
|
#### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS)
|
||||||
$ cd monolith
|
|
||||||
$ make install
|
```console
|
||||||
|
sudo port install monolith
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using [Snapcraft](https://snapcraft.io/monolith) (GNU/Linux)
|
||||||
|
|
||||||
|
```console
|
||||||
|
snap install monolith
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
|
||||||
|
|
||||||
|
```console
|
||||||
|
pkg install monolith
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using [FreeBSD ports](https://www.freshports.org/www/monolith/) (FreeBSD)
|
||||||
|
|
||||||
|
```console
|
||||||
|
cd /usr/ports/www/monolith/
|
||||||
|
make install clean
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using [pkgsrc](https://pkgsrc.se/www/monolith) (NetBSD, OpenBSD, Haiku, etc)
|
||||||
|
|
||||||
|
```console
|
||||||
|
cd /usr/pkgsrc/www/monolith
|
||||||
|
make install clean
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using [containers](https://www.docker.com/)
|
||||||
|
|
||||||
|
```console
|
||||||
|
docker build -t Y2Z/monolith .
|
||||||
|
sudo install -b dist/run-in-container.sh /usr/local/bin/monolith
|
||||||
|
```
|
||||||
|
|
||||||
|
#### From [source](https://github.com/Y2Z/monolith)
|
||||||
|
|
||||||
|
Dependency: `libssl`
|
||||||
|
|
||||||
|
```console
|
||||||
|
git clone https://github.com/Y2Z/monolith.git
|
||||||
|
cd monolith
|
||||||
|
make install
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc)
|
||||||
|
|
||||||
|
Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standard CPU architecture.
|
||||||
|
|
||||||
#### With Docker
|
|
||||||
The guide can be found [here](docs/containers.md)
|
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
|
|
||||||
|
```console
|
||||||
|
monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
## Options
|
## Options
|
||||||
- `-c`: Ignore styles
|
|
||||||
- `-f`: Exclude frames and iframes
|
- `-a`: Exclude audio sources
|
||||||
- `-F`: Omit web fonts
|
- `-b`: Use custom `base URL`
|
||||||
|
- `-c`: Exclude CSS
|
||||||
|
- `-C`: Save document using custom `charset`
|
||||||
|
- `-d`: Allow retrieving assets only from specified `domain(s)`
|
||||||
|
- `-e`: Ignore network errors
|
||||||
|
- `-E`: Avoid retrieving assets located within specified domains
|
||||||
|
- `-f`: Omit frames
|
||||||
|
- `-F`: Exclude web fonts
|
||||||
- `-i`: Remove images
|
- `-i`: Remove images
|
||||||
- `-I`: Isolate the document
|
- `-I`: Isolate the document
|
||||||
- `-j`: Exclude JavaScript
|
- `-j`: Exclude JavaScript
|
||||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||||
- `-o`: Write output to file
|
- `-M`: Don't add timestamp and URL information
|
||||||
- `-s`: Silent mode
|
- `-n`: Extract contents of NOSCRIPT elements
|
||||||
- `-t`: Set custom network request timeout
|
- `-o`: Write output to `file` (use “-” for STDOUT)
|
||||||
- `-u`: Provide own User-Agent
|
- `-s`: Be quiet
|
||||||
|
- `-t`: Adjust `network request timeout`
|
||||||
|
- `-u`: Provide custom `User-Agent`
|
||||||
|
- `-v`: Exclude videos
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
## HTTPS and HTTP proxies
|
|
||||||
|
## Proxies
|
||||||
|
|
||||||
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
|
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
Please open an issue if something is wrong, that helps make this project better.
|
Please open an issue if something is wrong, that helps make this project better.
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
## Related projects
|
## Related projects
|
||||||
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
|
|
||||||
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
|
- Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
|
||||||
- `Personal WayBack Machine`: https://github.com/popey/pwbm
|
- Pagesaver: https://github.com/distributed-mind/pagesaver
|
||||||
|
- Personal WayBack Machine: https://github.com/popey/pwbm
|
||||||
|
- Hako: https://github.com/dmpop/hako
|
||||||
|
- Monk: https://github.com/monk-dev/monk
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
The Unlicense
|
|
||||||
|
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
|
||||||
|
This software is distributed without any warranty.
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
<!-- Microtext -->
|
<!-- Microtext -->
|
||||||
<sub>Keep in mind that `monolith` is not aware of your browser’s session</sub>
|
<sub>Keep in mind that `monolith` is not aware of your browser’s session</sub>
|
||||||
|
BIN
assets/icon/icon.blend
Normal file
BIN
assets/icon/icon.blend
Normal file
Binary file not shown.
BIN
assets/icon/icon.png
Normal file
BIN
assets/icon/icon.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.2 MiB |
10
dist/run-in-container.sh
vendored
Normal file
10
dist/run-in-container.sh
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
DOCKER=docker
|
||||||
|
PROG_NAME=monolith
|
||||||
|
|
||||||
|
if which podman 2>&1 > /dev/null; then
|
||||||
|
DOCKER=podman
|
||||||
|
fi
|
||||||
|
|
||||||
|
$DOCKER run --rm Y2Z/$PROG_NAME "$@"
|
@ -8,12 +8,12 @@ Accepted
|
|||||||
|
|
||||||
## Context
|
## Context
|
||||||
|
|
||||||
HTML pages sometimes contain NOSCRIPT nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
|
HTML pages can contain `noscript` nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
|
||||||
|
|
||||||
## Decision
|
## Decision
|
||||||
|
|
||||||
When the document is being saved with or without JavaScript, each NOSCRIPT node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all NOSCRIPT nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
|
When the document is being saved with or without JavaScript, each `noscript` node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all `noscript` nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
|
||||||
|
|
||||||
## Consequences
|
## Consequences
|
||||||
|
|
||||||
Saved documents will have contents of all NOSCRIPT nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within NOSCRIPT nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" NOSCRIPT elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.
|
Saved documents will have contents of all `noscript` nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within `noscript` nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" `noscript` elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# 2. Network request timeout
|
# 3. Network request timeout
|
||||||
|
|
||||||
Date: 2020-02-15
|
Date: 2020-02-15
|
||||||
|
|
||||||
@ -12,14 +12,10 @@ A slow network connection and overloaded server may negatively impact network re
|
|||||||
|
|
||||||
## Decision
|
## Decision
|
||||||
|
|
||||||
Make the program simulate behavior of popular web browsers and CLI tools, where
|
Make the program simulate behavior of popular web browsers and CLI tools, where the default network response timeout is most often set to 120 seconds.
|
||||||
the default network response timeout is most often set to 120 seconds.
|
|
||||||
|
|
||||||
Instead of featuring retries for timed out network requests, the program
|
Instead of featuring retries for timed out network requests, the program should have an option to adjust the timeout length, along with making it indefinite when given "0" as its value.
|
||||||
should have an option to adjust the timeout length, along with making it
|
|
||||||
indefinite when given "0" as its value.
|
|
||||||
|
|
||||||
## Consequences
|
## Consequences
|
||||||
|
|
||||||
The user is able to retrieve resources that have long response time, as well as obtain
|
The user is able to retrieve resources that have long response time, as well as obtain full control over how soon, and if at all, network requests should time out.
|
||||||
full control over how soon, and if at all, network requests should time out.
|
|
||||||
|
21
docs/arch/0004-asset-integrity-check.md
Normal file
21
docs/arch/0004-asset-integrity-check.md
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# 4. Asset integrity check
|
||||||
|
|
||||||
|
Date: 2020-02-23
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
In HTML5, `link` and `script` nodes have an attribute named `integrity`, which lets the browser check if the remote file is valid, mostly for the purpose of enhancing page security.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
In order to replicate the browser's behavior, the program should perform integrity check the same way it does, excluding the linked asset from the final result if such check fails.
|
||||||
|
|
||||||
|
The `integrity` attribute should be removed from nodes, as it bears no benefit for resources embedded as data URLs.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
Assets that fail to pass the check get excluded from the saved document. Meanwhile, saved documents no longer contain integrity attributes on all `link` and `script` nodes.
|
@ -1,4 +1,4 @@
|
|||||||
# 4. Asset Minimization
|
# 5. Asset Minimization
|
||||||
|
|
||||||
Date: 2020-03-14
|
Date: 2020-03-14
|
||||||
|
|
||||||
@ -8,18 +8,12 @@ Accepted
|
|||||||
|
|
||||||
## Context
|
## Context
|
||||||
|
|
||||||
It may look like a good idea to make monolith compress retrieved assets while
|
It may look like a good idea to make monolith compress retrieved assets while saving the page for the purpose of reducing the resulting document's file size.
|
||||||
saving the page for the purpose of reducing the resulting document's file size.
|
|
||||||
|
|
||||||
## Decision
|
## Decision
|
||||||
|
|
||||||
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool,
|
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool, aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript), it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages saved by monolith, if needed.
|
||||||
aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript),
|
|
||||||
it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them
|
|
||||||
does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages
|
|
||||||
saved by monolith, if needed.
|
|
||||||
|
|
||||||
## Consequences
|
## Consequences
|
||||||
|
|
||||||
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal
|
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.
|
||||||
amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.
|
|
||||||
|
19
docs/arch/0006-reload-and-location-meta-tags.md
Normal file
19
docs/arch/0006-reload-and-location-meta-tags.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# 6. Reload and location `meta` tags
|
||||||
|
|
||||||
|
Date: 2020-06-25
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.
|
19
docs/arch/0007-network-errors.md
Normal file
19
docs/arch/0007-network-errors.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# 7. Network errors
|
||||||
|
|
||||||
|
Date: 2020-11-22
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.
|
40
docs/arch/0008-base-tag.md
Normal file
40
docs/arch/0008-base-tag.md
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# 8. Base Tag
|
||||||
|
|
||||||
|
Date: 2020-12-25
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
|
||||||
|
|
||||||
|
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
|
||||||
|
|
||||||
|
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
|
||||||
|
|
||||||
|
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
#### If the base tag does not exist in the source document
|
||||||
|
|
||||||
|
- If the base tag does not exist in the source document
|
||||||
|
- With base URL option provided
|
||||||
|
- use the specified base URL value to retrieve assets, keep original base URL value in the document
|
||||||
|
- Without base URL option provided
|
||||||
|
- download document as usual, do not add base tag
|
||||||
|
- If the base tag already exists in the source document
|
||||||
|
- With base URL option provided
|
||||||
|
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
|
||||||
|
- Without base URL option provided:
|
||||||
|
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
|
||||||
|
|
||||||
|
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
|
||||||
|
|
||||||
|
The program will obatin ability to get rid of existing base tag values (by provind an empty one).
|
@ -1,15 +0,0 @@
|
|||||||
1. Run `docker build -t y2z/monolith .` to create a Docker image
|
|
||||||
|
|
||||||
2. Create a file named `monolith` which contains:
|
|
||||||
```sh
|
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
docker run --rm \
|
|
||||||
y2z/monolith \
|
|
||||||
monolith \
|
|
||||||
"$@"
|
|
||||||
```
|
|
||||||
3. Make the file executable (`chmod +x monolith`) and include it into your `$PATH`
|
|
||||||
4. Now you should be able to run a containerized build of monolith like this:
|
|
||||||
`monolith -I https://github.com > document.html`
|
|
||||||
|
|
3
docs/references.md
Normal file
3
docs/references.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# References
|
||||||
|
|
||||||
|
- https://content-security-policy.com/
|
23
docs/web-apps.md
Normal file
23
docs/web-apps.md
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Web apps that can be saved with Monolith
|
||||||
|
|
||||||
|
These apps retain all or most of their functionality when saved with Monolith:
|
||||||
|
|
||||||
|
|
||||||
|
## Converse
|
||||||
|
|
||||||
|
| Website | https://conversejs.org |
|
||||||
|
|:-----------------------|:--------------------------------------------------------------------|
|
||||||
|
| Description | An XMPP client built using web technologies |
|
||||||
|
| Functionality retained | **full** |
|
||||||
|
| Command to use | `monolith https://conversejs.org/fullscreen.html > conversejs.html` |
|
||||||
|
| Monolith version used | 2.2.7 |
|
||||||
|
|
||||||
|
|
||||||
|
## Markdown Tables generator
|
||||||
|
|
||||||
|
| Website | https://www.tablesgenerator.com |
|
||||||
|
|:--------------------------|:-----------------------------------------------------------------------------------------------|
|
||||||
|
| Description | Tool for creating tables in extended Markdown format |
|
||||||
|
| Functionality retained | **full** |
|
||||||
|
| Command to use | `monolith -I https://www.tablesgenerator.com/markdown_tables -o markdown-table-generator.html` |
|
||||||
|
| Monolith version used | 2.6.1 |
|
25
monolith.nuspec
Normal file
25
monolith.nuspec
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
|
||||||
|
<metadata>
|
||||||
|
<id>monolith</id>
|
||||||
|
<version>2.4.0</version>
|
||||||
|
<title>Monolith</title>
|
||||||
|
<authors>Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd</authors>
|
||||||
|
<projectUrl>https://github.com/Y2Z/monolith</projectUrl>
|
||||||
|
<iconUrl>https://raw.githubusercontent.com/Y2Z/monolith/master/assets/icon/icon.png</iconUrl>
|
||||||
|
<licenseUrl>https://raw.githubusercontent.com/Y2Z/monolith/master/LICENSE</licenseUrl>
|
||||||
|
<requireLicenseAcceptance>false</requireLicenseAcceptance>
|
||||||
|
<description>CLI tool for saving complete web pages as a single HTML file
|
||||||
|
|
||||||
|
A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
|
||||||
|
|
||||||
|
Unlike the conventional “Save page as”, monolith not only saves the target document, it embeds CSS, image, and JavaScript assets all at once, producing a single HTML5 document that is a joy to store and share.
|
||||||
|
|
||||||
|
If compared to saving websites using wget, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
|
||||||
|
</description>
|
||||||
|
<copyright>Public Domain</copyright>
|
||||||
|
<language>en-US</language>
|
||||||
|
<tags>scraping archiving</tags>
|
||||||
|
<docsUrl>https://github.com/Y2Z/monolith/blob/master/README.md</docsUrl>
|
||||||
|
</metadata>
|
||||||
|
</package>
|
@ -18,11 +18,11 @@ description: |
|
|||||||
|
|
||||||
confinement: strict
|
confinement: strict
|
||||||
|
|
||||||
# Building on armhf fails, so we specify all supported non-armhf architectures
|
|
||||||
architectures:
|
architectures:
|
||||||
- build-on: amd64
|
- build-on: amd64
|
||||||
- build-on: i386
|
|
||||||
- build-on: arm64
|
- build-on: arm64
|
||||||
|
- build-on: armhf
|
||||||
|
- build-on: i386
|
||||||
- build-on: ppc64el
|
- build-on: ppc64el
|
||||||
- build-on: s390x
|
- build-on: s390x
|
||||||
|
|
||||||
|
76
src/args.rs
76
src/args.rs
@ -1,76 +0,0 @@
|
|||||||
use clap::{App, Arg};
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct AppArgs {
|
|
||||||
pub target: String,
|
|
||||||
pub no_css: bool,
|
|
||||||
pub no_fonts: bool,
|
|
||||||
pub no_frames: bool,
|
|
||||||
pub no_images: bool,
|
|
||||||
pub no_js: bool,
|
|
||||||
pub insecure: bool,
|
|
||||||
pub isolate: bool,
|
|
||||||
pub output: String,
|
|
||||||
pub silent: bool,
|
|
||||||
pub timeout: u64,
|
|
||||||
pub user_agent: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
|
||||||
const DEFAULT_USER_AGENT: &str =
|
|
||||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
|
|
||||||
|
|
||||||
impl AppArgs {
|
|
||||||
pub fn get() -> AppArgs {
|
|
||||||
let app = App::new(env!("CARGO_PKG_NAME"))
|
|
||||||
.version(crate_version!())
|
|
||||||
.author(crate_authors!("\n"))
|
|
||||||
.about(crate_description!())
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("target")
|
|
||||||
.required(true)
|
|
||||||
.takes_value(true)
|
|
||||||
.index(1)
|
|
||||||
.help("URL or file path"),
|
|
||||||
)
|
|
||||||
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
|
|
||||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
|
||||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
|
||||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
|
||||||
.args_from_usage("-i, --no-images 'Removes images'")
|
|
||||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
|
||||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
|
||||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
|
||||||
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
|
||||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
|
||||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
|
||||||
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
|
||||||
// .args_from_usage("-v, --include-video 'Removes video sources'")
|
|
||||||
.get_matches();
|
|
||||||
let mut app_args = AppArgs::default();
|
|
||||||
// Process the command
|
|
||||||
app_args.target = app
|
|
||||||
.value_of("target")
|
|
||||||
.expect("please set target")
|
|
||||||
.to_string();
|
|
||||||
app_args.no_css = app.is_present("no-css");
|
|
||||||
app_args.no_fonts = app.is_present("no-fonts");
|
|
||||||
app_args.no_frames = app.is_present("no-frames");
|
|
||||||
app_args.no_images = app.is_present("no-images");
|
|
||||||
app_args.no_js = app.is_present("no-js");
|
|
||||||
app_args.insecure = app.is_present("insecure");
|
|
||||||
app_args.isolate = app.is_present("isolate");
|
|
||||||
app_args.silent = app.is_present("silent");
|
|
||||||
app_args.timeout = app
|
|
||||||
.value_of("timeout")
|
|
||||||
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
|
|
||||||
.parse::<u64>()
|
|
||||||
.unwrap();
|
|
||||||
app_args.output = app.value_of("output").unwrap_or("").to_string();
|
|
||||||
app_args.user_agent = app
|
|
||||||
.value_of("user-agent")
|
|
||||||
.unwrap_or(DEFAULT_USER_AGENT)
|
|
||||||
.to_string();
|
|
||||||
app_args
|
|
||||||
}
|
|
||||||
}
|
|
349
src/css.rs
349
src/css.rs
@ -1,8 +1,13 @@
|
|||||||
use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
|
use cssparser::{
|
||||||
|
serialize_identifier, serialize_string, ParseError, Parser, ParserInput, SourcePosition, Token,
|
||||||
|
};
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset};
|
use crate::opts::Options;
|
||||||
|
use crate::url::{create_data_url, resolve_url, EMPTY_IMAGE_DATA_URL};
|
||||||
|
use crate::utils::retrieve_asset;
|
||||||
|
|
||||||
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
||||||
// Universal
|
// Universal
|
||||||
@ -24,7 +29,44 @@ const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
|||||||
"suffix",
|
"suffix",
|
||||||
"symbols",
|
"symbols",
|
||||||
];
|
];
|
||||||
const CSS_SPECIAL_CHARS: &str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
|
|
||||||
|
pub fn embed_css(
|
||||||
|
cache: &mut HashMap<String, Vec<u8>>,
|
||||||
|
client: &Client,
|
||||||
|
document_url: &Url,
|
||||||
|
css: &str,
|
||||||
|
options: &Options,
|
||||||
|
depth: u32,
|
||||||
|
) -> String {
|
||||||
|
let mut input = ParserInput::new(&css);
|
||||||
|
let mut parser = Parser::new(&mut input);
|
||||||
|
|
||||||
|
process_css(
|
||||||
|
cache,
|
||||||
|
client,
|
||||||
|
document_url,
|
||||||
|
&mut parser,
|
||||||
|
options,
|
||||||
|
depth,
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn format_ident(ident: &str) -> String {
|
||||||
|
let mut res: String = "".to_string();
|
||||||
|
let _ = serialize_identifier(ident, &mut res);
|
||||||
|
res = res.trim_end().to_string();
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn format_quoted_string(string: &str) -> String {
|
||||||
|
let mut res: String = "".to_string();
|
||||||
|
let _ = serialize_string(string, &mut res);
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_image_url_prop(prop_name: &str) -> bool {
|
pub fn is_image_url_prop(prop_name: &str) -> bool {
|
||||||
CSS_PROPS_WITH_IMAGE_URLS
|
CSS_PROPS_WITH_IMAGE_URLS
|
||||||
@ -33,42 +75,21 @@ pub fn is_image_url_prop(prop_name: &str) -> bool {
|
|||||||
.is_some()
|
.is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn enquote(input: String, double: bool) -> String {
|
|
||||||
if double {
|
|
||||||
format!("\"{}\"", input.replace("\"", "\\\""))
|
|
||||||
} else {
|
|
||||||
format!("'{}'", input.replace("'", "\\'"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn escape(value: &str) -> String {
|
|
||||||
let mut res = str!(&value);
|
|
||||||
|
|
||||||
res = res.replace("\\", "\\\\");
|
|
||||||
|
|
||||||
for c in CSS_SPECIAL_CHARS.chars() {
|
|
||||||
res = res.replace(c, format!("\\{}", c).as_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
res
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn process_css<'a>(
|
pub fn process_css<'a>(
|
||||||
cache: &mut HashMap<String, Vec<u8>>,
|
cache: &mut HashMap<String, Vec<u8>>,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
parent_url: &str,
|
document_url: &Url,
|
||||||
parser: &mut Parser,
|
parser: &mut Parser,
|
||||||
|
options: &Options,
|
||||||
|
depth: u32,
|
||||||
rule_name: &str,
|
rule_name: &str,
|
||||||
prop_name: &str,
|
prop_name: &str,
|
||||||
func_name: &str,
|
func_name: &str,
|
||||||
opt_no_fonts: bool,
|
|
||||||
opt_no_images: bool,
|
|
||||||
opt_silent: bool,
|
|
||||||
) -> Result<String, ParseError<'a, String>> {
|
) -> Result<String, ParseError<'a, String>> {
|
||||||
let mut result: String = str!();
|
let mut result: String = "".to_string();
|
||||||
|
|
||||||
let mut curr_rule: String = str!(rule_name.clone());
|
let mut curr_rule: String = rule_name.clone().to_string();
|
||||||
let mut curr_prop: String = str!(prop_name.clone());
|
let mut curr_prop: String = prop_name.clone().to_string();
|
||||||
let mut token: &Token;
|
let mut token: &Token;
|
||||||
let mut token_offset: SourcePosition;
|
let mut token_offset: SourcePosition;
|
||||||
|
|
||||||
@ -84,13 +105,13 @@ pub fn process_css<'a>(
|
|||||||
match *token {
|
match *token {
|
||||||
Token::Comment(_) => {
|
Token::Comment(_) => {
|
||||||
let token_slice = parser.slice_from(token_offset);
|
let token_slice = parser.slice_from(token_offset);
|
||||||
result.push_str(str!(token_slice).as_str());
|
result.push_str(token_slice);
|
||||||
}
|
}
|
||||||
Token::Semicolon => result.push_str(";"),
|
Token::Semicolon => result.push_str(";"),
|
||||||
Token::Colon => result.push_str(":"),
|
Token::Colon => result.push_str(":"),
|
||||||
Token::Comma => result.push_str(","),
|
Token::Comma => result.push_str(","),
|
||||||
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
|
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
|
||||||
if opt_no_fonts && curr_rule == "font-face" {
|
if options.no_fonts && curr_rule == "font-face" {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,14 +132,13 @@ pub fn process_css<'a>(
|
|||||||
process_css(
|
process_css(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
parent_url,
|
document_url,
|
||||||
parser,
|
parser,
|
||||||
|
options,
|
||||||
|
depth,
|
||||||
rule_name,
|
rule_name,
|
||||||
curr_prop.as_str(),
|
curr_prop.as_str(),
|
||||||
func_name,
|
func_name,
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -141,14 +161,14 @@ pub fn process_css<'a>(
|
|||||||
}
|
}
|
||||||
// div...
|
// div...
|
||||||
Token::Ident(ref value) => {
|
Token::Ident(ref value) => {
|
||||||
curr_rule = str!();
|
curr_rule = "".to_string();
|
||||||
curr_prop = str!(value);
|
curr_prop = value.to_string();
|
||||||
result.push_str(&escape(value));
|
result.push_str(&format_ident(value));
|
||||||
}
|
}
|
||||||
// @import, @font-face, @charset, @media...
|
// @import, @font-face, @charset, @media...
|
||||||
Token::AtKeyword(ref value) => {
|
Token::AtKeyword(ref value) => {
|
||||||
curr_rule = str!(value);
|
curr_rule = value.to_string();
|
||||||
if opt_no_fonts && curr_rule == "font-face" {
|
if options.no_fonts && curr_rule == "font-face" {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
result.push_str("@");
|
result.push_str("@");
|
||||||
@ -161,73 +181,101 @@ pub fn process_css<'a>(
|
|||||||
Token::QuotedString(ref value) => {
|
Token::QuotedString(ref value) => {
|
||||||
if curr_rule == "import" {
|
if curr_rule == "import" {
|
||||||
// Reset current at-rule value
|
// Reset current at-rule value
|
||||||
curr_rule = str!();
|
curr_rule = "".to_string();
|
||||||
|
|
||||||
// Skip empty import values
|
// Skip empty import values
|
||||||
if value.len() < 1 {
|
if value.len() == 0 {
|
||||||
result.push_str("''");
|
result.push_str("''");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
let import_full_url: Url = resolve_url(&document_url, value);
|
||||||
let url_fragment = get_url_fragment(full_url.clone());
|
match retrieve_asset(
|
||||||
let (css, final_url) = retrieve_asset(
|
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&parent_url,
|
&document_url,
|
||||||
&full_url,
|
&import_full_url,
|
||||||
false,
|
options,
|
||||||
"",
|
depth + 1,
|
||||||
opt_silent,
|
) {
|
||||||
)
|
Ok((
|
||||||
.unwrap_or_default();
|
import_contents,
|
||||||
|
import_final_url,
|
||||||
result.push_str(
|
import_media_type,
|
||||||
enquote(
|
import_charset,
|
||||||
data_to_data_url(
|
)) => {
|
||||||
"text/css",
|
let mut import_data_url = create_data_url(
|
||||||
|
&import_media_type,
|
||||||
|
&import_charset,
|
||||||
embed_css(
|
embed_css(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
final_url.as_str(),
|
&import_final_url,
|
||||||
&css,
|
&String::from_utf8_lossy(&import_contents),
|
||||||
opt_no_fonts,
|
options,
|
||||||
opt_no_images,
|
depth + 1,
|
||||||
opt_silent,
|
|
||||||
)
|
)
|
||||||
.as_bytes(),
|
.as_bytes(),
|
||||||
&final_url,
|
&import_final_url,
|
||||||
url_fragment.as_str(),
|
|
||||||
),
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.as_str(),
|
|
||||||
);
|
);
|
||||||
|
import_data_url.set_fragment(import_full_url.fragment());
|
||||||
|
result.push_str(
|
||||||
|
format_quoted_string(&import_data_url.to_string()).as_str(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Keep remote reference if unable to retrieve the asset
|
||||||
|
if import_full_url.scheme() == "http"
|
||||||
|
|| import_full_url.scheme() == "https"
|
||||||
|
{
|
||||||
|
result.push_str(
|
||||||
|
format_quoted_string(&import_full_url.to_string()).as_str(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if func_name == "url" {
|
if func_name == "url" {
|
||||||
// Skip empty url()'s
|
// Skip empty url()'s
|
||||||
if value.len() < 1 {
|
if value.len() == 0 {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
|
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
|
||||||
result.push_str(enquote(str!(empty_image!()), false).as_str());
|
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
|
||||||
} else {
|
} else {
|
||||||
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
|
let resolved_url: Url = resolve_url(&document_url, value);
|
||||||
let (data_url, _final_url) = retrieve_asset(
|
match retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&parent_url,
|
&document_url,
|
||||||
&resolved_url,
|
&resolved_url,
|
||||||
true,
|
options,
|
||||||
"",
|
depth + 1,
|
||||||
opt_silent,
|
) {
|
||||||
)
|
Ok((data, final_url, media_type, charset)) => {
|
||||||
.unwrap_or_default();
|
let mut data_url =
|
||||||
result.push_str(enquote(data_url, false).as_str());
|
create_data_url(&media_type, &charset, &data, &final_url);
|
||||||
|
data_url.set_fragment(resolved_url.fragment());
|
||||||
|
result.push_str(
|
||||||
|
format_quoted_string(&data_url.to_string()).as_str(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Keep remote reference if unable to retrieve the asset
|
||||||
|
if resolved_url.scheme() == "http"
|
||||||
|
|| resolved_url.scheme() == "https"
|
||||||
|
{
|
||||||
|
result.push_str(
|
||||||
|
format_quoted_string(&resolved_url.to_string())
|
||||||
|
.as_str(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
result.push_str(enquote(str!(value), false).as_str());
|
result.push_str(format_quoted_string(value).as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -249,7 +297,7 @@ pub fn process_css<'a>(
|
|||||||
if *has_sign && *unit_value >= 0. {
|
if *has_sign && *unit_value >= 0. {
|
||||||
result.push_str("+");
|
result.push_str("+");
|
||||||
}
|
}
|
||||||
result.push_str(str!(unit_value * 100.).as_str());
|
result.push_str(&(unit_value * 100.0).to_string());
|
||||||
result.push_str("%");
|
result.push_str("%");
|
||||||
}
|
}
|
||||||
Token::Dimension {
|
Token::Dimension {
|
||||||
@ -261,21 +309,22 @@ pub fn process_css<'a>(
|
|||||||
if *has_sign && *value >= 0. {
|
if *has_sign && *value >= 0. {
|
||||||
result.push_str("+");
|
result.push_str("+");
|
||||||
}
|
}
|
||||||
result.push_str(str!(value).as_str());
|
result.push_str(&value.to_string());
|
||||||
result.push_str(str!(unit).as_str());
|
result.push_str(&unit.to_string());
|
||||||
}
|
}
|
||||||
// #selector, #id...
|
// #selector, #id...
|
||||||
Token::IDHash(ref value) => {
|
Token::IDHash(ref value) => {
|
||||||
curr_rule = str!();
|
curr_rule = "".to_string();
|
||||||
result.push_str("#");
|
result.push_str("#");
|
||||||
result.push_str(&escape(value));
|
result.push_str(&format_ident(value));
|
||||||
}
|
}
|
||||||
|
// url()
|
||||||
Token::UnquotedUrl(ref value) => {
|
Token::UnquotedUrl(ref value) => {
|
||||||
let is_import: bool = curr_rule == "import";
|
let is_import: bool = curr_rule == "import";
|
||||||
|
|
||||||
if is_import {
|
if is_import {
|
||||||
// Reset current at-rule value
|
// Reset current at-rule value
|
||||||
curr_rule = str!();
|
curr_rule = "".to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip empty url()'s
|
// Skip empty url()'s
|
||||||
@ -291,60 +340,75 @@ pub fn process_css<'a>(
|
|||||||
|
|
||||||
result.push_str("url(");
|
result.push_str("url(");
|
||||||
if is_import {
|
if is_import {
|
||||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
let full_url: Url = resolve_url(&document_url, value);
|
||||||
let url_fragment = get_url_fragment(full_url.clone());
|
match retrieve_asset(
|
||||||
let (css, final_url) = retrieve_asset(
|
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&parent_url,
|
&document_url,
|
||||||
&full_url,
|
&full_url,
|
||||||
false,
|
options,
|
||||||
"",
|
depth + 1,
|
||||||
opt_silent,
|
) {
|
||||||
)
|
Ok((css, final_url, media_type, charset)) => {
|
||||||
.unwrap_or_default();
|
let mut data_url = create_data_url(
|
||||||
|
&media_type,
|
||||||
result.push_str(
|
&charset,
|
||||||
enquote(
|
|
||||||
data_to_data_url(
|
|
||||||
"text/css",
|
|
||||||
embed_css(
|
embed_css(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
final_url.as_str(),
|
&final_url,
|
||||||
&css,
|
&String::from_utf8_lossy(&css),
|
||||||
opt_no_fonts,
|
options,
|
||||||
opt_no_images,
|
depth + 1,
|
||||||
opt_silent,
|
|
||||||
)
|
)
|
||||||
.as_bytes(),
|
.as_bytes(),
|
||||||
&final_url,
|
&final_url,
|
||||||
url_fragment.as_str(),
|
|
||||||
),
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.as_str(),
|
|
||||||
);
|
);
|
||||||
|
data_url.set_fragment(full_url.fragment());
|
||||||
|
result.push_str(format_quoted_string(&data_url.to_string()).as_str());
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Keep remote reference if unable to retrieve the asset
|
||||||
|
if full_url.scheme() == "http" || full_url.scheme() == "https" {
|
||||||
|
result
|
||||||
|
.push_str(format_quoted_string(&full_url.to_string()).as_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
|
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
|
||||||
result.push_str(enquote(str!(empty_image!()), false).as_str());
|
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
|
||||||
} else {
|
} else {
|
||||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
let full_url: Url = resolve_url(&document_url, value);
|
||||||
let (data_url, _final_url) = retrieve_asset(
|
match retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&parent_url,
|
&document_url,
|
||||||
&full_url,
|
&full_url,
|
||||||
true,
|
options,
|
||||||
"",
|
depth + 1,
|
||||||
opt_silent,
|
) {
|
||||||
)
|
Ok((data, final_url, media_type, charset)) => {
|
||||||
.unwrap_or_default();
|
let mut data_url =
|
||||||
result.push_str(enquote(data_url, false).as_str());
|
create_data_url(&media_type, &charset, &data, &final_url);
|
||||||
|
data_url.set_fragment(full_url.fragment());
|
||||||
|
result
|
||||||
|
.push_str(format_quoted_string(&data_url.to_string()).as_str());
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Keep remote reference if unable to retrieve the asset
|
||||||
|
if full_url.scheme() == "http" || full_url.scheme() == "https" {
|
||||||
|
result.push_str(
|
||||||
|
format_quoted_string(&full_url.to_string()).as_str(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result.push_str(")");
|
result.push_str(")");
|
||||||
}
|
}
|
||||||
|
// =
|
||||||
Token::Delim(ref value) => result.push_str(&value.to_string()),
|
Token::Delim(ref value) => result.push_str(&value.to_string()),
|
||||||
Token::Function(ref name) => {
|
Token::Function(ref name) => {
|
||||||
let function_name: &str = &name.clone();
|
let function_name: &str = &name.clone();
|
||||||
@ -356,14 +420,13 @@ pub fn process_css<'a>(
|
|||||||
process_css(
|
process_css(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
parent_url,
|
document_url,
|
||||||
parser,
|
parser,
|
||||||
|
options,
|
||||||
|
depth,
|
||||||
curr_rule.as_str(),
|
curr_rule.as_str(),
|
||||||
curr_prop.as_str(),
|
curr_prop.as_str(),
|
||||||
function_name,
|
function_name,
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -375,32 +438,10 @@ pub fn process_css<'a>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure empty CSS is really empty
|
||||||
|
if result.len() > 0 && result.trim().len() == 0 {
|
||||||
|
result = result.trim().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed_css(
|
|
||||||
cache: &mut HashMap<String, Vec<u8>>,
|
|
||||||
client: &Client,
|
|
||||||
parent_url: &str,
|
|
||||||
css: &str,
|
|
||||||
opt_no_fonts: bool,
|
|
||||||
opt_no_images: bool,
|
|
||||||
opt_silent: bool,
|
|
||||||
) -> String {
|
|
||||||
let mut input = ParserInput::new(&css);
|
|
||||||
let mut parser = Parser::new(&mut input);
|
|
||||||
|
|
||||||
process_css(
|
|
||||||
cache,
|
|
||||||
client,
|
|
||||||
parent_url,
|
|
||||||
&mut parser,
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
}
|
|
||||||
|
1668
src/html.rs
1668
src/html.rs
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
|||||||
const JS_DOM_EVENT_ATTRS: &[&str] = &[
|
const JS_DOM_EVENT_ATTRS: &'static [&str] = &[
|
||||||
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
|
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
|
||||||
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
|
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
|
||||||
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
|
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
|
||||||
|
@ -1,10 +1,6 @@
|
|||||||
#[macro_use]
|
|
||||||
mod macros;
|
|
||||||
|
|
||||||
pub mod css;
|
pub mod css;
|
||||||
pub mod html;
|
pub mod html;
|
||||||
pub mod js;
|
pub mod js;
|
||||||
|
pub mod opts;
|
||||||
|
pub mod url;
|
||||||
pub mod utils;
|
pub mod utils;
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub mod tests;
|
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
#[macro_export]
|
|
||||||
macro_rules! str {
|
|
||||||
() => {
|
|
||||||
String::new()
|
|
||||||
};
|
|
||||||
($val: expr) => {
|
|
||||||
ToString::to_string(&$val)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! empty_image {
|
|
||||||
() => {
|
|
||||||
"data:image/png;base64,\
|
|
||||||
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII="
|
|
||||||
};
|
|
||||||
}
|
|
364
src/main.rs
364
src/main.rs
@ -1,21 +1,22 @@
|
|||||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
use encoding_rs::Encoding;
|
||||||
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
|
use html5ever::rcdom::RcDom;
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::env;
|
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::{self, Error, Write};
|
use std::io::{self, prelude::*, Error, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::process;
|
use std::process;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
mod args;
|
use monolith::html::{
|
||||||
mod macros;
|
add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
|
||||||
|
serialize_document, set_base_url, set_charset, walk_and_embed_assets,
|
||||||
#[macro_use]
|
};
|
||||||
extern crate clap;
|
use monolith::opts::Options;
|
||||||
use crate::args::AppArgs;
|
use monolith::url::{create_data_url, resolve_url};
|
||||||
|
use monolith::utils::retrieve_asset;
|
||||||
|
|
||||||
enum Output {
|
enum Output {
|
||||||
Stdout(io::Stdout),
|
Stdout(io::Stdout),
|
||||||
@ -24,138 +25,305 @@ enum Output {
|
|||||||
|
|
||||||
impl Output {
|
impl Output {
|
||||||
fn new(file_path: &str) -> Result<Output, Error> {
|
fn new(file_path: &str) -> Result<Output, Error> {
|
||||||
if file_path.is_empty() {
|
if file_path.is_empty() || file_path.eq("-") {
|
||||||
Ok(Output::Stdout(io::stdout()))
|
Ok(Output::Stdout(io::stdout()))
|
||||||
} else {
|
} else {
|
||||||
Ok(Output::File(fs::File::create(file_path)?))
|
Ok(Output::File(fs::File::create(file_path)?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
|
fn write(&mut self, bytes: &Vec<u8>) -> Result<(), Error> {
|
||||||
match self {
|
match self {
|
||||||
Output::Stdout(stdout) => {
|
Output::Stdout(stdout) => {
|
||||||
writeln!(stdout, "{}", s)?;
|
stdout.write_all(bytes)?;
|
||||||
|
// Ensure newline at end of output
|
||||||
|
if bytes.last() != Some(&b"\n"[0]) {
|
||||||
|
stdout.write(b"\n")?;
|
||||||
|
}
|
||||||
stdout.flush()
|
stdout.flush()
|
||||||
}
|
}
|
||||||
Output::File(f) => {
|
Output::File(file) => {
|
||||||
writeln!(f, "{}", s)?;
|
file.write_all(bytes)?;
|
||||||
f.flush()
|
// Ensure newline at end of output
|
||||||
|
if bytes.last() != Some(&b"\n"[0]) {
|
||||||
|
file.write(b"\n")?;
|
||||||
|
}
|
||||||
|
file.flush()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn read_stdin() -> Vec<u8> {
|
||||||
|
let mut buffer: Vec<u8> = vec![];
|
||||||
|
|
||||||
|
match io::stdin().lock().read_to_end(&mut buffer) {
|
||||||
|
Ok(_) => buffer,
|
||||||
|
Err(_) => buffer,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let app_args = AppArgs::get();
|
let options = Options::from_args();
|
||||||
let original_target: &str = &app_args.target;
|
|
||||||
let target_url: &str;
|
|
||||||
let base_url;
|
|
||||||
let dom;
|
|
||||||
|
|
||||||
// Pre-process the input
|
// Check if target was provided
|
||||||
let cwd_normalized: String =
|
if options.target.len() == 0 {
|
||||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
if !options.silent {
|
||||||
let path = Path::new(original_target);
|
|
||||||
let mut target: String = str!(original_target.clone()).replace("\\", "/");
|
|
||||||
let path_is_relative: bool = path.is_relative();
|
|
||||||
|
|
||||||
if target.clone().len() == 0 {
|
|
||||||
eprintln!("No target specified");
|
eprintln!("No target specified");
|
||||||
process::exit(1);
|
}
|
||||||
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
|
|
||||||
target_url = target.as_str();
|
|
||||||
} else if is_file_url(target.clone()) {
|
|
||||||
target_url = target.as_str();
|
|
||||||
} else if path.exists() {
|
|
||||||
if !path.is_file() {
|
|
||||||
eprintln!("Local target is not a file: {}", original_target);
|
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
|
|
||||||
if path_is_relative {
|
// Check if custom charset is valid
|
||||||
target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
|
if let Some(custom_charset) = options.charset.clone() {
|
||||||
target.insert_str(
|
if !Encoding::for_label_no_replacement(custom_charset.as_bytes()).is_some() {
|
||||||
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
|
eprintln!("Unknown encoding: {}", &custom_charset);
|
||||||
"/",
|
process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut use_stdin: bool = false;
|
||||||
|
|
||||||
|
let target_url = match options.target.as_str() {
|
||||||
|
"-" => {
|
||||||
|
// Read from pipe (stdin)
|
||||||
|
use_stdin = true;
|
||||||
|
// Set default target URL to an empty data URL; the user can set it via --base-url
|
||||||
|
Url::parse("data:text/html,").unwrap()
|
||||||
|
}
|
||||||
|
target => match Url::parse(&target) {
|
||||||
|
Ok(url) => match url.scheme() {
|
||||||
|
"data" | "file" | "http" | "https" => url,
|
||||||
|
unsupported_scheme => {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!("Unsupported target URL type: {}", unsupported_scheme);
|
||||||
|
}
|
||||||
|
process::exit(1)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
// Failed to parse given base URL (perhaps it's a filesystem path?)
|
||||||
|
let path: &Path = Path::new(&target);
|
||||||
|
match path.exists() {
|
||||||
|
true => match path.is_file() {
|
||||||
|
true => {
|
||||||
|
let canonical_path = fs::canonicalize(&path).unwrap();
|
||||||
|
match Url::from_file_path(canonical_path) {
|
||||||
|
Ok(url) => url,
|
||||||
|
Err(_) => {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!(
|
||||||
|
"Could not generate file URL out of given path: {}",
|
||||||
|
&target
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
target_url = target.as_str();
|
process::exit(1);
|
||||||
} else {
|
|
||||||
target.insert_str(0, "http://");
|
|
||||||
target_url = target.as_str();
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
}
|
||||||
|
false => {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!("Local target is not a file: {}", &target);
|
||||||
|
}
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
false => {
|
||||||
|
// It is not a FS path, now we do what browsers do:
|
||||||
|
// prepend "http://" and hope it points to a website
|
||||||
|
Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target))
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
// Initialize client
|
// Initialize client
|
||||||
let mut cache = HashMap::new();
|
let mut cache = HashMap::new();
|
||||||
let mut header_map = HeaderMap::new();
|
let mut header_map = HeaderMap::new();
|
||||||
|
if let Some(user_agent) = &options.user_agent {
|
||||||
header_map.insert(
|
header_map.insert(
|
||||||
USER_AGENT,
|
USER_AGENT,
|
||||||
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
|
HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"),
|
||||||
);
|
);
|
||||||
|
}
|
||||||
let timeout: u64 = if app_args.timeout > 0 {
|
let client = if options.timeout > 0 {
|
||||||
app_args.timeout
|
Client::builder().timeout(Duration::from_secs(options.timeout))
|
||||||
} else {
|
} else {
|
||||||
std::u64::MAX / 4
|
// No timeout is default
|
||||||
};
|
Client::builder()
|
||||||
let client = Client::builder()
|
}
|
||||||
.timeout(Duration::from_secs(timeout))
|
.danger_accept_invalid_certs(options.insecure)
|
||||||
.danger_accept_invalid_certs(app_args.insecure)
|
|
||||||
.default_headers(header_map)
|
.default_headers(header_map)
|
||||||
.build()
|
.build()
|
||||||
.expect("Failed to initialize HTTP client");
|
.expect("Failed to initialize HTTP client");
|
||||||
|
|
||||||
// Retrieve root document
|
// At first we assume that base URL is the same as target URL
|
||||||
if is_file_url(target_url) || is_http_url(target_url) {
|
let mut base_url: Url = target_url.clone();
|
||||||
let (data, final_url) = retrieve_asset(
|
|
||||||
&mut cache,
|
let data: Vec<u8>;
|
||||||
&client,
|
let mut document_encoding: String = "".to_string();
|
||||||
target_url,
|
let mut dom: RcDom;
|
||||||
target_url,
|
|
||||||
false,
|
// Retrieve target document
|
||||||
"",
|
if use_stdin {
|
||||||
app_args.silent,
|
data = read_stdin();
|
||||||
)
|
} else if target_url.scheme() == "file"
|
||||||
.expect("Could not retrieve target document");
|
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|
||||||
base_url = final_url;
|
|| target_url.scheme() == "data"
|
||||||
dom = html_to_dom(&data);
|
{
|
||||||
} else if is_data_url(target_url) {
|
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
|
||||||
let (media_type, text): (String, String) = data_url_to_text(target_url);
|
Ok((retrieved_data, final_url, media_type, charset)) => {
|
||||||
|
// Make sure the media type is text/html
|
||||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||||
eprintln!("Unsupported data URL media type");
|
if !options.silent {
|
||||||
|
eprintln!("Unsupported document media type");
|
||||||
|
}
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
base_url = str!(target_url);
|
|
||||||
dom = html_to_dom(&text);
|
if options
|
||||||
|
.base_url
|
||||||
|
.clone()
|
||||||
|
.unwrap_or("".to_string())
|
||||||
|
.is_empty()
|
||||||
|
{
|
||||||
|
base_url = final_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
data = retrieved_data;
|
||||||
|
document_encoding = charset;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!("Could not retrieve target document");
|
||||||
|
}
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
walk_and_embed_assets(
|
// Initial parse
|
||||||
|
dom = html_to_dom(&data, document_encoding.clone());
|
||||||
|
|
||||||
|
// TODO: investigate if charset from filesystem/data URL/HTTP headers
|
||||||
|
// has say over what's specified in HTML
|
||||||
|
|
||||||
|
// Attempt to determine document's charset
|
||||||
|
if let Some(html_charset) = get_charset(&dom.document) {
|
||||||
|
if !html_charset.is_empty() {
|
||||||
|
// Check if the charset specified inside HTML is valid
|
||||||
|
if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) {
|
||||||
|
document_encoding = html_charset;
|
||||||
|
dom = html_to_dom(&data, encoding.name().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use custom base URL if specified, read and use what's in the DOM otherwise
|
||||||
|
let custom_base_url: String = options.base_url.clone().unwrap_or("".to_string());
|
||||||
|
if custom_base_url.is_empty() {
|
||||||
|
// No custom base URL is specified
|
||||||
|
// Try to see if document has BASE element
|
||||||
|
if let Some(existing_base_url) = get_base_url(&dom.document) {
|
||||||
|
base_url = resolve_url(&target_url, &existing_base_url);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Custom base URL provided
|
||||||
|
match Url::parse(&custom_base_url) {
|
||||||
|
Ok(parsed_url) => {
|
||||||
|
if parsed_url.scheme() == "file" {
|
||||||
|
// File base URLs can only work with
|
||||||
|
// documents saved from filesystem
|
||||||
|
if target_url.scheme() == "file" {
|
||||||
|
base_url = parsed_url;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
base_url = parsed_url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Failed to parse given base URL, perhaps it's a filesystem path?
|
||||||
|
if target_url.scheme() == "file" {
|
||||||
|
// Relative paths could work for documents saved from filesystem
|
||||||
|
let path: &Path = Path::new(&custom_base_url);
|
||||||
|
if path.exists() {
|
||||||
|
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
|
||||||
|
Ok(file_url) => {
|
||||||
|
base_url = file_url;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!(
|
||||||
|
"Could not map given path to base URL: {}",
|
||||||
|
custom_base_url
|
||||||
|
);
|
||||||
|
}
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Traverse through the document and embed remote assets
|
||||||
|
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
// Update or add new BASE element to reroute network requests and hash-links
|
||||||
|
if let Some(new_base_url) = options.base_url.clone() {
|
||||||
|
dom = set_base_url(&dom.document, new_base_url);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Request and embed /favicon.ico (unless it's already linked in the document)
|
||||||
|
if !options.no_images
|
||||||
|
&& (target_url.scheme() == "http" || target_url.scheme() == "https")
|
||||||
|
&& !has_favicon(&dom.document)
|
||||||
|
{
|
||||||
|
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");
|
||||||
|
|
||||||
|
match retrieve_asset(
|
||||||
&mut cache,
|
&mut cache,
|
||||||
&client,
|
&client,
|
||||||
&base_url,
|
&target_url,
|
||||||
&dom.document,
|
&favicon_ico_url,
|
||||||
app_args.no_css,
|
&options,
|
||||||
app_args.no_fonts,
|
0,
|
||||||
app_args.no_frames,
|
) {
|
||||||
app_args.no_js,
|
Ok((data, final_url, media_type, charset)) => {
|
||||||
app_args.no_images,
|
let favicon_data_url: Url =
|
||||||
app_args.silent,
|
create_data_url(&media_type, &charset, &data, &final_url);
|
||||||
);
|
dom = add_favicon(&dom.document, favicon_data_url.to_string());
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Failed to retrieve /favicon.ico
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let html: String = stringify_document(
|
// Save using specified charset, if given
|
||||||
&dom.document,
|
if let Some(custom_charset) = options.charset.clone() {
|
||||||
app_args.no_css,
|
document_encoding = custom_charset;
|
||||||
app_args.no_frames,
|
dom = set_charset(dom, document_encoding.clone());
|
||||||
app_args.no_js,
|
}
|
||||||
app_args.no_images,
|
|
||||||
app_args.isolate,
|
|
||||||
);
|
|
||||||
|
|
||||||
output
|
// Serialize DOM tree
|
||||||
.writeln_str(&html)
|
let mut result: Vec<u8> = serialize_document(dom, document_encoding, &options);
|
||||||
.expect("Could not write HTML output");
|
|
||||||
|
// Prepend metadata comment tag
|
||||||
|
if !options.no_metadata {
|
||||||
|
let mut metadata_comment: String = create_metadata_tag(&target_url);
|
||||||
|
metadata_comment += "\n";
|
||||||
|
result.splice(0..0, metadata_comment.as_bytes().to_vec());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define output
|
||||||
|
let mut output = Output::new(&options.output).expect("Could not prepare output");
|
||||||
|
|
||||||
|
// Write result into stdout or file
|
||||||
|
output.write(&result).expect("Could not write HTML output");
|
||||||
}
|
}
|
||||||
|
144
src/opts.rs
Normal file
144
src/opts.rs
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
use clap::{App, Arg, ArgAction};
|
||||||
|
use std::env;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Options {
|
||||||
|
pub no_audio: bool,
|
||||||
|
pub base_url: Option<String>,
|
||||||
|
pub no_css: bool,
|
||||||
|
pub charset: Option<String>,
|
||||||
|
pub domains: Option<Vec<String>>,
|
||||||
|
pub ignore_errors: bool,
|
||||||
|
pub exclude_domains: bool,
|
||||||
|
pub no_frames: bool,
|
||||||
|
pub no_fonts: bool,
|
||||||
|
pub no_images: bool,
|
||||||
|
pub isolate: bool,
|
||||||
|
pub no_js: bool,
|
||||||
|
pub insecure: bool,
|
||||||
|
pub no_metadata: bool,
|
||||||
|
pub output: String,
|
||||||
|
pub silent: bool,
|
||||||
|
pub timeout: u64,
|
||||||
|
pub user_agent: Option<String>,
|
||||||
|
pub no_video: bool,
|
||||||
|
pub target: String,
|
||||||
|
pub no_color: bool,
|
||||||
|
pub unwrap_noscript: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
const ASCII: &'static str = " \
|
||||||
|
_____ ______________ __________ ___________________ ___
|
||||||
|
| \\ / \\ | | | | | |
|
||||||
|
| \\_/ __ \\_| __ | | ___ ___ |__| |
|
||||||
|
| | | | | | | | | | | |
|
||||||
|
| |\\ /| |__| _ |__| |____| | | | | __ |
|
||||||
|
| | \\___/ | | \\ | | | | | | |
|
||||||
|
|___| |__________| \\_____________________| |___| |___| |___|
|
||||||
|
";
|
||||||
|
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
||||||
|
const DEFAULT_USER_AGENT: &'static str =
|
||||||
|
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
|
||||||
|
const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
|
||||||
|
const ENV_VAR_TERM: &str = "TERM";
|
||||||
|
|
||||||
|
impl Options {
|
||||||
|
pub fn from_args() -> Options {
|
||||||
|
let app = App::new(env!("CARGO_PKG_NAME"))
|
||||||
|
.version(env!("CARGO_PKG_VERSION"))
|
||||||
|
.author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str())
|
||||||
|
.about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str())
|
||||||
|
.args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||||
|
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
|
||||||
|
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||||
|
.args_from_usage("-C, --charset=[UTF-8] 'Enforces custom encoding'")
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("domains")
|
||||||
|
.short('d')
|
||||||
|
.long("domains")
|
||||||
|
.takes_value(true)
|
||||||
|
.value_name("DOMAINS")
|
||||||
|
.action(ArgAction::Append)
|
||||||
|
.help("Whitelist of domains"),
|
||||||
|
)
|
||||||
|
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||||
|
.args_from_usage("-E, --exclude-domains 'Treat specified domains as blacklist'")
|
||||||
|
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||||
|
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||||
|
.args_from_usage("-i, --no-images 'Removes images'")
|
||||||
|
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||||
|
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||||
|
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||||
|
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
|
||||||
|
.args_from_usage(
|
||||||
|
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
|
||||||
|
)
|
||||||
|
.args_from_usage(
|
||||||
|
"-o, --output=[document.html] 'Writes output to <file>, use - for STDOUT'",
|
||||||
|
)
|
||||||
|
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||||
|
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||||
|
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
||||||
|
.args_from_usage("-v, --no-video 'Removes video sources'")
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("target")
|
||||||
|
.required(true)
|
||||||
|
.takes_value(true)
|
||||||
|
.index(1)
|
||||||
|
.help("URL or file path, use - for STDIN"),
|
||||||
|
)
|
||||||
|
.get_matches();
|
||||||
|
let mut options: Options = Options::default();
|
||||||
|
|
||||||
|
// Process the command
|
||||||
|
options.target = app
|
||||||
|
.value_of("target")
|
||||||
|
.expect("please set target")
|
||||||
|
.to_string();
|
||||||
|
options.no_audio = app.is_present("no-audio");
|
||||||
|
if let Some(base_url) = app.value_of("base-url") {
|
||||||
|
options.base_url = Some(base_url.to_string());
|
||||||
|
}
|
||||||
|
options.no_css = app.is_present("no-css");
|
||||||
|
if let Some(charset) = app.value_of("charset") {
|
||||||
|
options.charset = Some(charset.to_string());
|
||||||
|
}
|
||||||
|
if let Some(domains) = app.get_many::<String>("domains") {
|
||||||
|
let list_of_domains: Vec<String> = domains.map(|v| v.clone()).collect::<Vec<_>>();
|
||||||
|
options.domains = Some(list_of_domains);
|
||||||
|
}
|
||||||
|
options.ignore_errors = app.is_present("ignore-errors");
|
||||||
|
options.exclude_domains = app.is_present("exclude-domains");
|
||||||
|
options.no_frames = app.is_present("no-frames");
|
||||||
|
options.no_fonts = app.is_present("no-fonts");
|
||||||
|
options.no_images = app.is_present("no-images");
|
||||||
|
options.isolate = app.is_present("isolate");
|
||||||
|
options.no_js = app.is_present("no-js");
|
||||||
|
options.insecure = app.is_present("insecure");
|
||||||
|
options.no_metadata = app.is_present("no-metadata");
|
||||||
|
options.output = app.value_of("output").unwrap_or("").to_string();
|
||||||
|
options.silent = app.is_present("silent");
|
||||||
|
options.timeout = app
|
||||||
|
.value_of("timeout")
|
||||||
|
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
|
||||||
|
.parse::<u64>()
|
||||||
|
.unwrap();
|
||||||
|
if let Some(user_agent) = app.value_of("user-agent") {
|
||||||
|
options.user_agent = Some(user_agent.to_string());
|
||||||
|
} else {
|
||||||
|
options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
|
||||||
|
}
|
||||||
|
options.unwrap_noscript = app.is_present("unwrap-noscript");
|
||||||
|
options.no_video = app.is_present("no-video");
|
||||||
|
|
||||||
|
options.no_color =
|
||||||
|
env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr);
|
||||||
|
if let Some(term) = env::var_os(ENV_VAR_TERM) {
|
||||||
|
if term == "dumb" {
|
||||||
|
options.no_color = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
options
|
||||||
|
}
|
||||||
|
}
|
520
src/tests/cli.rs
520
src/tests/cli.rs
@ -1,520 +0,0 @@
|
|||||||
use assert_cmd::prelude::*;
|
|
||||||
use std::env;
|
|
||||||
use std::io::Write;
|
|
||||||
use std::process::Command;
|
|
||||||
use tempfile::NamedTempFile;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_print_version() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd.arg("-V").output().unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain program name and version
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd.arg("").output().unwrap();
|
|
||||||
|
|
||||||
// STDOUT should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
|
||||||
|
|
||||||
// STDERR should contain error description
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
"No target specified\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 1
|
|
||||||
out.assert().code(1);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
|
||||||
|
|
||||||
// STDERR should contain error description
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
"Unsupported data URL media type\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 1
|
|
||||||
out.assert().code(1);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd
|
|
||||||
.arg("-I")
|
|
||||||
.arg("data:text/html,Hello%2C%20World!")
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain isolated HTML
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
|
||||||
</head><body>Hello, World!</body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd
|
|
||||||
.arg("-c")
|
|
||||||
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML with no CSS
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
|
||||||
<style></style>\
|
|
||||||
</head><body>Hello</body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd
|
|
||||||
.arg("-f")
|
|
||||||
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML with no iframes
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
|
||||||
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd
|
|
||||||
.arg("-i")
|
|
||||||
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML with no images
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
format!(
|
|
||||||
"<html>\
|
|
||||||
<head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
|
|
||||||
</head>\
|
|
||||||
<body>\
|
|
||||||
<img src=\"{empty_image}\">\
|
|
||||||
Hi\
|
|
||||||
</body>\
|
|
||||||
</html>\n",
|
|
||||||
empty_image = empty_image!()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd
|
|
||||||
.arg("-j")
|
|
||||||
.arg("data:text/html,<script>alert(2)</script>Hi")
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML with no JS
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html>\
|
|
||||||
<head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
|
||||||
<script></script></head>\
|
|
||||||
<body>Hi</body>\
|
|
||||||
</html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let cwd_normalized: String =
|
|
||||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
|
||||||
let out = cmd
|
|
||||||
.arg(if cfg!(windows) {
|
|
||||||
"src\\tests\\data\\local-file.html"
|
|
||||||
} else {
|
|
||||||
"src/tests/data/local-file.html"
|
|
||||||
})
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
|
||||||
|
|
||||||
// STDOUT should contain HTML from the local file
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"\
|
|
||||||
<!DOCTYPE html><html lang=\"en\"><head>\n \
|
|
||||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
|
||||||
<title>Local HTML file</title>\n \
|
|
||||||
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
|
|
||||||
<link href=\"data:text/css;base64,\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
|
||||||
<img alt=\"\" src=\"\">\n \
|
|
||||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
|
||||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
|
||||||
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
|
|
||||||
</body></html>\n\
|
|
||||||
"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should contain list of retrieved file URLs
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
{file}{cwd}/src/tests/data/local-file.html\n\
|
|
||||||
{file}{cwd}/src/tests/data/local-style.css\n\
|
|
||||||
{file}{cwd}/src/tests/data/local-script.js\n\
|
|
||||||
",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>>
|
|
||||||
{
|
|
||||||
let cwd = env::current_dir().unwrap();
|
|
||||||
let cwd_normalized: String =
|
|
||||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd
|
|
||||||
.arg("-jciI")
|
|
||||||
.arg(if cfg!(windows) {
|
|
||||||
format!(
|
|
||||||
"{cwd}\\src\\tests\\data\\local-file.html",
|
|
||||||
cwd = cwd.to_str().unwrap()
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
format!(
|
|
||||||
"{cwd}/src/tests/data/local-file.html",
|
|
||||||
cwd = cwd.to_str().unwrap()
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
|
||||||
|
|
||||||
// STDOUT should contain HTML from the local file
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
|
||||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
|
||||||
<title>Local HTML file</title>\n \
|
|
||||||
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
|
|
||||||
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
|
||||||
<img alt=\"\" src=\"{empty_image}\">\n \
|
|
||||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
|
||||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
|
||||||
<script src=\"\"></script>\n\n\n\n\
|
|
||||||
</body></html>\n\
|
|
||||||
",
|
|
||||||
empty_image = empty_image!()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should contain only the target file
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
format!(
|
|
||||||
"{file}{cwd}/src/tests/data/local-file.html\n",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized,
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let cwd_normalized: String =
|
|
||||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
|
||||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
|
||||||
let out = cmd
|
|
||||||
.arg("-cji")
|
|
||||||
.arg(if cfg!(windows) {
|
|
||||||
format!(
|
|
||||||
"{file}{cwd}/src/tests/data/local-file.html",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
format!(
|
|
||||||
"{file}{cwd}/src/tests/data/local-file.html",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML from the local file
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
|
||||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
|
||||||
<title>Local HTML file</title>\n \
|
|
||||||
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
|
|
||||||
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
|
||||||
<img alt=\"\" src=\"{empty_image}\">\n \
|
|
||||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
|
||||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
|
||||||
<script src=\"\"></script>\n\n\n\n\
|
|
||||||
</body></html>\n\
|
|
||||||
",
|
|
||||||
empty_image = empty_image!()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should contain list of retrieved file URLs
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
if cfg!(windows) {
|
|
||||||
format!(
|
|
||||||
"{file}{cwd}/src/tests/data/local-file.html\n",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
format!(
|
|
||||||
"{file}{cwd}/src/tests/data/local-file.html\n",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_security_disallow_local_assets_within_data_url_targets(
|
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let out = cmd
|
|
||||||
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
|
|
||||||
.output()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML with no JS in it
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head><script src=\"\"></script></head><body></body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_embed_file_url_local_asset_within_style_attribute(
|
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let mut file_svg = NamedTempFile::new()?;
|
|
||||||
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
|
|
||||||
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
|
|
||||||
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
|
|
||||||
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
|
|
||||||
</svg>\n")?;
|
|
||||||
let mut file_html = NamedTempFile::new()?;
|
|
||||||
writeln!(
|
|
||||||
file_html,
|
|
||||||
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
|
|
||||||
file = file_url_prefix,
|
|
||||||
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
|
|
||||||
)?;
|
|
||||||
let out = cmd.arg(file_html.path()).output().unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML with date URL for background-image in it
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should list temporary files that got retrieved
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
{file}{html_path}\n\
|
|
||||||
{file}{svg_path}\n\
|
|
||||||
",
|
|
||||||
file = file_url_prefix,
|
|
||||||
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
|
|
||||||
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_css_import_string() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
|
||||||
let mut file_css = NamedTempFile::new()?;
|
|
||||||
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
|
|
||||||
let mut file_html = NamedTempFile::new()?;
|
|
||||||
writeln!(
|
|
||||||
file_html,
|
|
||||||
"\
|
|
||||||
<style>\n\
|
|
||||||
@charset 'UTF-8';\n\
|
|
||||||
\n\
|
|
||||||
@import '{file}{css_path}';\n\
|
|
||||||
\n\
|
|
||||||
@import url({file}{css_path});\n\
|
|
||||||
\n\
|
|
||||||
@import url('{file}{css_path}')\n\
|
|
||||||
</style>\n\
|
|
||||||
",
|
|
||||||
file = file_url_prefix,
|
|
||||||
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
|
|
||||||
)?;
|
|
||||||
let out = cmd.arg(file_html.path()).output().unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain embedded CSS url()'s
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should list temporary files that got retrieved
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
{file}{html_path}\n\
|
|
||||||
{file}{css_path}\n\
|
|
||||||
{file}{css_path}\n\
|
|
||||||
{file}{css_path}\n\
|
|
||||||
",
|
|
||||||
file = file_url_prefix,
|
|
||||||
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
|
|
||||||
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
@ -1,317 +0,0 @@
|
|||||||
use reqwest::blocking::Client;
|
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
use crate::css;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_empty_input() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(cache, &client, "", "", false, false, false,),
|
|
||||||
""
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_style_exclude_unquoted_images() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const STYLE: &str = "/* border: none;*/\
|
|
||||||
background-image: url(https://somewhere.com/bg.png); \
|
|
||||||
list-style: url(/assets/images/bullet.svg);\
|
|
||||||
width:99.998%; \
|
|
||||||
margin-top: -20px; \
|
|
||||||
line-height: -1; \
|
|
||||||
height: calc(100vh - 10pt)";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"https://doesntmatter.local/",
|
|
||||||
&STYLE,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"/* border: none;*/\
|
|
||||||
background-image: url('{empty_image}'); \
|
|
||||||
list-style: url('{empty_image}');\
|
|
||||||
width:99.998%; \
|
|
||||||
margin-top: -20px; \
|
|
||||||
line-height: -1; \
|
|
||||||
height: calc(100vh - 10pt)",
|
|
||||||
empty_image = empty_image!()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_style_exclude_single_quoted_images() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const STYLE: &str = "/* border: none;*/\
|
|
||||||
background-image: url('https://somewhere.com/bg.png'); \
|
|
||||||
list-style: url('/assets/images/bullet.svg');\
|
|
||||||
width:99.998%; \
|
|
||||||
margin-top: -20px; \
|
|
||||||
line-height: -1; \
|
|
||||||
height: calc(100vh - 10pt)";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(cache, &client, "", &STYLE, false, true, true,),
|
|
||||||
format!(
|
|
||||||
"/* border: none;*/\
|
|
||||||
background-image: url('{empty_image}'); \
|
|
||||||
list-style: url('{empty_image}');\
|
|
||||||
width:99.998%; \
|
|
||||||
margin-top: -20px; \
|
|
||||||
line-height: -1; \
|
|
||||||
height: calc(100vh - 10pt)",
|
|
||||||
empty_image = empty_image!()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_style_block() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const CSS: &str = "\
|
|
||||||
#id.class-name:not(:nth-child(3n+0)) {\n \
|
|
||||||
// border: none;\n \
|
|
||||||
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
|
|
||||||
}\n\
|
|
||||||
\n\
|
|
||||||
html > body {}";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(cache, &client, "file:///", &CSS, false, false, true,),
|
|
||||||
CSS
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_attribute_selectors() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const CSS: &str = "\
|
|
||||||
[data-value] {
|
|
||||||
/* Attribute exists */
|
|
||||||
}
|
|
||||||
|
|
||||||
[data-value='foo'] {
|
|
||||||
/* Attribute has this exact value */
|
|
||||||
}
|
|
||||||
|
|
||||||
[data-value*='foo'] {
|
|
||||||
/* Attribute value contains this value somewhere in it */
|
|
||||||
}
|
|
||||||
|
|
||||||
[data-value~='foo'] {
|
|
||||||
/* Attribute has this value in a space-separated list somewhere */
|
|
||||||
}
|
|
||||||
|
|
||||||
[data-value^='foo'] {
|
|
||||||
/* Attribute value starts with this */
|
|
||||||
}
|
|
||||||
|
|
||||||
[data-value|='foo'] {
|
|
||||||
/* Attribute value starts with this in a dash-separated list */
|
|
||||||
}
|
|
||||||
|
|
||||||
[data-value$='foo'] {
|
|
||||||
/* Attribute value ends with this */
|
|
||||||
}
|
|
||||||
";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(cache, &client, "", &CSS, false, false, false,),
|
|
||||||
CSS
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_import_string() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const CSS: &str = "\
|
|
||||||
@charset 'UTF-8';\n\
|
|
||||||
\n\
|
|
||||||
@import 'data:text/css,html{background-color:%23000}';\n\
|
|
||||||
\n\
|
|
||||||
@import url('data:text/css,html{color:%23fff}')\n\
|
|
||||||
";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"https://doesntmatter.local/",
|
|
||||||
&CSS,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
"\
|
|
||||||
@charset 'UTF-8';\n\
|
|
||||||
\n\
|
|
||||||
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
|
|
||||||
\n\
|
|
||||||
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
|
|
||||||
"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_hash_urls() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const CSS: &str = "\
|
|
||||||
body {\n \
|
|
||||||
behavior: url(#default#something);\n\
|
|
||||||
}\n\
|
|
||||||
\n\
|
|
||||||
.scissorHalf {\n \
|
|
||||||
offset-path: url(#somePath);\n\
|
|
||||||
}\n\
|
|
||||||
";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"https://doesntmatter.local/",
|
|
||||||
&CSS,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
CSS
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_transform_percentages_and_degrees() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const CSS: &str = "\
|
|
||||||
div {\n \
|
|
||||||
transform: translate(-50%, -50%) rotate(-45deg);\n\
|
|
||||||
transform: translate(50%, 50%) rotate(45deg);\n\
|
|
||||||
transform: translate(+50%, +50%) rotate(+45deg);\n\
|
|
||||||
}\n\
|
|
||||||
";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"https://doesntmatter.local/",
|
|
||||||
&CSS,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
CSS
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_unusual_indents() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const CSS: &str = "\
|
|
||||||
.is\\:good:hover {\n \
|
|
||||||
color: green\n\
|
|
||||||
}\n\
|
|
||||||
\n\
|
|
||||||
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
|
|
||||||
color: black\n\
|
|
||||||
}\n\
|
|
||||||
";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"https://doesntmatter.local/",
|
|
||||||
&CSS,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
CSS
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_exclude_fonts() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
const CSS: &str = "\
|
|
||||||
@font-face {\n \
|
|
||||||
font-family: 'My Font';\n \
|
|
||||||
src: url(my_font.woff);\n\
|
|
||||||
}\n\
|
|
||||||
\n\
|
|
||||||
#identifier {\n \
|
|
||||||
font-family: 'My Font' Arial\n\
|
|
||||||
}\n\
|
|
||||||
\n\
|
|
||||||
@font-face {\n \
|
|
||||||
font-family: 'My Font';\n \
|
|
||||||
src: url(my_font.woff);\n\
|
|
||||||
}\n\
|
|
||||||
\n\
|
|
||||||
div {\n \
|
|
||||||
font-family: 'My Font' Verdana\n\
|
|
||||||
}\n\
|
|
||||||
";
|
|
||||||
|
|
||||||
const CSS_OUT: &str = " \
|
|
||||||
\n\
|
|
||||||
\n\
|
|
||||||
#identifier {\n \
|
|
||||||
font-family: 'My Font' Arial\n\
|
|
||||||
}\n\
|
|
||||||
\n \
|
|
||||||
\n\
|
|
||||||
\n\
|
|
||||||
div {\n \
|
|
||||||
font-family: 'My Font' Verdana\n\
|
|
||||||
}\n\
|
|
||||||
";
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
css::embed_css(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"https://doesntmatter.local/",
|
|
||||||
&CSS,
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
CSS_OUT
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,50 +0,0 @@
|
|||||||
use crate::css;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_empty_input_single_quotes() {
|
|
||||||
assert_eq!(css::enquote(str!(""), false), "''");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_empty_input_double_quotes() {
|
|
||||||
assert_eq!(css::enquote(str!(""), true), "\"\"");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_apostrophes_single_quotes() {
|
|
||||||
assert_eq!(
|
|
||||||
css::enquote(str!("It's a lovely day, don't you think?"), false),
|
|
||||||
"'It\\'s a lovely day, don\\'t you think?'"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_apostrophes_double_quotes() {
|
|
||||||
assert_eq!(
|
|
||||||
css::enquote(str!("It's a lovely day, don't you think?"), true),
|
|
||||||
"\"It's a lovely day, don't you think?\""
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_feet_and_inches_single_quotes() {
|
|
||||||
assert_eq!(
|
|
||||||
css::enquote(str!("5'2\", 6'5\""), false),
|
|
||||||
"'5\\'2\", 6\\'5\"'"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_feet_and_inches_double_quotes() {
|
|
||||||
assert_eq!(
|
|
||||||
css::enquote(str!("5'2\", 6'5\""), true),
|
|
||||||
"\"5'2\\\", 6'5\\\"\""
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,49 +0,0 @@
|
|||||||
use crate::html;
|
|
||||||
use html5ever::rcdom::{Handle, NodeData};
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn get_node_name() {
|
|
||||||
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let mut count = 0;
|
|
||||||
|
|
||||||
fn test_walk(node: &Handle, i: &mut i8) {
|
|
||||||
*i += 1;
|
|
||||||
|
|
||||||
match &node.data {
|
|
||||||
NodeData::Document => {
|
|
||||||
for child in node.children.borrow().iter() {
|
|
||||||
test_walk(child, &mut *i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
NodeData::Element { ref name, .. } => {
|
|
||||||
let node_name = name.local.as_ref().to_string();
|
|
||||||
let parent = html::get_parent_node(node);
|
|
||||||
let parent_node_name = html::get_node_name(&parent);
|
|
||||||
if node_name == "head" || node_name == "body" {
|
|
||||||
assert_eq!(parent_node_name, Some("html"));
|
|
||||||
} else if node_name == "div" {
|
|
||||||
assert_eq!(parent_node_name, Some("body"));
|
|
||||||
} else if node_name == "p" {
|
|
||||||
assert_eq!(parent_node_name, Some("div"));
|
|
||||||
}
|
|
||||||
|
|
||||||
for child in node.children.borrow().iter() {
|
|
||||||
test_walk(child, &mut *i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
test_walk(&dom.document, &mut count);
|
|
||||||
|
|
||||||
assert_eq!(count, 7);
|
|
||||||
}
|
|
@ -1,4 +0,0 @@
|
|||||||
mod get_node_name;
|
|
||||||
mod is_icon;
|
|
||||||
mod stringify_document;
|
|
||||||
mod walk_and_embed_assets;
|
|
@ -1,188 +0,0 @@
|
|||||||
use crate::html;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_div_as_root_element() {
|
|
||||||
let html = "<div><script src=\"some.js\"></script></div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_isolate: bool = false;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
html::stringify_document(
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_isolate,
|
|
||||||
),
|
|
||||||
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_full_page_with_no_html_head_or_body() {
|
|
||||||
let html = "<title>Isolated document</title>\
|
|
||||||
<link rel=\"something\" href=\"some.css\" />\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
|
||||||
<div><script src=\"some.js\"></script></div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_isolate: bool = true;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
html::stringify_document(
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_isolate,
|
|
||||||
),
|
|
||||||
"<html>\
|
|
||||||
<head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
|
||||||
<title>Isolated document</title>\
|
|
||||||
<link rel=\"something\" href=\"some.css\">\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
|
||||||
</head>\
|
|
||||||
<body>\
|
|
||||||
<div>\
|
|
||||||
<script src=\"some.js\"></script>\
|
|
||||||
</div>\
|
|
||||||
</body>\
|
|
||||||
</html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_doctype_and_the_rest_no_html_head_or_body() {
|
|
||||||
let html = "<!doctype html>\
|
|
||||||
<title>Unstyled document</title>\
|
|
||||||
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
|
||||||
<div style=\"display: none;\"></div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
|
|
||||||
let opt_no_css: bool = true;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_isolate: bool = false;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
html::stringify_document(
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_isolate,
|
|
||||||
),
|
|
||||||
"<!DOCTYPE html>\
|
|
||||||
<html>\
|
|
||||||
<head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
|
||||||
<title>Unstyled document</title>\
|
|
||||||
<link rel=\"stylesheet\" href=\"main.css\">\
|
|
||||||
</head>\
|
|
||||||
<body><div style=\"display: none;\"></div></body>\
|
|
||||||
</html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
|
|
||||||
let html = "<!doctype html>\
|
|
||||||
<title>Frameless document</title>\
|
|
||||||
<link rel=\"something\"/>\
|
|
||||||
<div><script src=\"some.js\"></script></div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_frames: bool = true;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_isolate: bool = false;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
html::stringify_document(
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_isolate,
|
|
||||||
),
|
|
||||||
"<!DOCTYPE html>\
|
|
||||||
<html>\
|
|
||||||
<head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
|
||||||
<title>Frameless document</title>\
|
|
||||||
<link rel=\"something\">\
|
|
||||||
</head>\
|
|
||||||
<body><div><script src=\"some.js\"></script></div></body>\
|
|
||||||
</html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_doctype_and_the_rest_all_forbidden() {
|
|
||||||
let html = "<!doctype html>\
|
|
||||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
|
||||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
|
||||||
<div>\
|
|
||||||
<script src=\"some.js\"></script>\
|
|
||||||
<img style=\"width: 100%;\" src=\"some.png\" />\
|
|
||||||
<iframe src=\"some.html\"></iframe>\
|
|
||||||
</div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
|
|
||||||
let opt_isolate: bool = true;
|
|
||||||
let opt_no_css: bool = true;
|
|
||||||
let opt_no_frames: bool = true;
|
|
||||||
let opt_no_js: bool = true;
|
|
||||||
let opt_no_images: bool = true;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
html::stringify_document(
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_isolate,
|
|
||||||
),
|
|
||||||
"<!DOCTYPE html>\
|
|
||||||
<html>\
|
|
||||||
<head>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; frame-src 'none';child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
|
|
||||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
|
||||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
|
||||||
</head>\
|
|
||||||
<body>\
|
|
||||||
<div>\
|
|
||||||
<script src=\"some.js\"></script>\
|
|
||||||
<img style=\"width: 100%;\" src=\"some.png\">\
|
|
||||||
<iframe src=\"some.html\"></iframe>\
|
|
||||||
</div>\
|
|
||||||
</body>\
|
|
||||||
</html>"
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,419 +0,0 @@
|
|||||||
use crate::html;
|
|
||||||
use html5ever::serialize::{serialize, SerializeOpts};
|
|
||||||
use reqwest::blocking::Client;
|
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_basic() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let html = "<div><P></P></div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_silent = true;
|
|
||||||
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html><head></head><body><div><p></p></div></body></html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_ensure_no_recursive_iframe() {
|
|
||||||
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_silent = true;
|
|
||||||
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_ensure_no_recursive_frame() {
|
|
||||||
let html = "<frameset><frame src=\"\"></frameset>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_silent = true;
|
|
||||||
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_no_css() {
|
|
||||||
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
|
||||||
<style>html{background-color: #000;}</style>\
|
|
||||||
<div style=\"display: none;\"></div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let opt_no_css: bool = true;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_silent = true;
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html>\
|
|
||||||
<head>\
|
|
||||||
<link rel=\"stylesheet\" href=\"\">\
|
|
||||||
<style></style>\
|
|
||||||
</head>\
|
|
||||||
<body>\
|
|
||||||
<div></div>\
|
|
||||||
</body>\
|
|
||||||
</html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_no_images() {
|
|
||||||
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
|
||||||
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = true;
|
|
||||||
let opt_silent = true;
|
|
||||||
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
format!(
|
|
||||||
"<html>\
|
|
||||||
<head>\
|
|
||||||
<link rel=\"icon\" href=\"\">\
|
|
||||||
</head>\
|
|
||||||
<body>\
|
|
||||||
<div>\
|
|
||||||
<img src=\"{empty_image}\">\
|
|
||||||
</div>\
|
|
||||||
</body>\
|
|
||||||
</html>",
|
|
||||||
empty_image = empty_image!()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_no_body_background_images() {
|
|
||||||
let html = "<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = true;
|
|
||||||
let opt_silent = true;
|
|
||||||
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html><head></head><body></body></html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_no_frames() {
|
|
||||||
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = true;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_silent = true;
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_no_iframes() {
|
|
||||||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = true;
|
|
||||||
let opt_no_js: bool = false;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_silent = true;
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_no_js() {
|
|
||||||
let html = "<div onClick=\"void(0)\">\
|
|
||||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
|
||||||
<script>alert(1)</script>\
|
|
||||||
</div>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
let opt_no_css: bool = false;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = false;
|
|
||||||
let opt_no_js: bool = true;
|
|
||||||
let opt_no_images: bool = false;
|
|
||||||
let opt_silent = true;
|
|
||||||
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html><head></head><body><div><script src=\"\"></script>\
|
|
||||||
<script></script></div></body></html>"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_with_no_integrity() {
|
|
||||||
let html = "<title>No integrity</title>\
|
|
||||||
<link integrity=\"sha384-...\" rel=\"something\"/>\
|
|
||||||
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
|
||||||
let dom = html::html_to_dom(&html);
|
|
||||||
let url = "http://localhost";
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
let opt_no_css: bool = true;
|
|
||||||
let opt_no_fonts: bool = false;
|
|
||||||
let opt_no_frames: bool = true;
|
|
||||||
let opt_no_js: bool = true;
|
|
||||||
let opt_no_images: bool = true;
|
|
||||||
let opt_silent = true;
|
|
||||||
|
|
||||||
html::walk_and_embed_assets(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&url,
|
|
||||||
&dom.document,
|
|
||||||
opt_no_css,
|
|
||||||
opt_no_fonts,
|
|
||||||
opt_no_frames,
|
|
||||||
opt_no_js,
|
|
||||||
opt_no_images,
|
|
||||||
opt_silent,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
||||||
"<html>\
|
|
||||||
<head><title>No integrity</title><link rel=\"something\"><script src=\"\"></script></head>\
|
|
||||||
<body></body>\
|
|
||||||
</html>"
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,40 +0,0 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_removes_fragment() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::clean_url("https://somewhere.com/font.eot#iefix"),
|
|
||||||
"https://somewhere.com/font.eot"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_removes_empty_fragment() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::clean_url("https://somewhere.com/font.eot#"),
|
|
||||||
"https://somewhere.com/font.eot"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_removes_empty_query_and_empty_fragment() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::clean_url("https://somewhere.com/font.eot?#"),
|
|
||||||
"https://somewhere.com/font.eot"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_removes_empty_query_amp_and_empty_fragment() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::clean_url("https://somewhere.com/font.eot?a=b&#"),
|
|
||||||
"https://somewhere.com/font.eot?a=b"
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,95 +0,0 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_parse_text_html_base64() {
|
|
||||||
let (media_type, text) = utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
|
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
|
||||||
assert_eq!(
|
|
||||||
text,
|
|
||||||
"Work expands so as to fill the time available for its completion"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_parse_text_html_utf8() {
|
|
||||||
let (media_type, text) = utils::data_url_to_text(
|
|
||||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
|
||||||
assert_eq!(
|
|
||||||
text,
|
|
||||||
"Work expands so as to fill the time available for its completion"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_parse_text_html_plaintext() {
|
|
||||||
let (media_type, text) = utils::data_url_to_text(
|
|
||||||
"data:text/html,Work expands so as to fill the time available for its completion",
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
|
||||||
assert_eq!(
|
|
||||||
text,
|
|
||||||
"Work expands so as to fill the time available for its completion"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
|
|
||||||
let (media_type, text) = utils::data_url_to_text(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
|
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
|
||||||
assert_eq!(
|
|
||||||
text,
|
|
||||||
"Work expands so as to fill the time available for its completion"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_parse_text_css_url_encoded() {
|
|
||||||
let (media_type, text) = utils::data_url_to_text("data:text/css,div{background-color:%23000}");
|
|
||||||
|
|
||||||
assert_eq!(media_type, "text/css");
|
|
||||||
assert_eq!(text, "div{background-color:#000}");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_parse_no_media_type_base64() {
|
|
||||||
let (media_type, text) = utils::data_url_to_text("data:;base64,dGVzdA==");
|
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
|
||||||
assert_eq!(text, "test");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_parse_no_media_type_no_encoding() {
|
|
||||||
let (media_type, text) = utils::data_url_to_text("data:;,test%20test");
|
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
|
||||||
assert_eq!(text, "test test");
|
|
||||||
}
|
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
|
||||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_just_word_data() {
|
|
||||||
let (media_type, text) = utils::data_url_to_text("data");
|
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
|
||||||
assert_eq!(text, "");
|
|
||||||
}
|
|
@ -1,36 +0,0 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_decode_unicode_characters() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::decode_url(str!(
|
|
||||||
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
|
|
||||||
)),
|
|
||||||
"検ヒム解塗ゃッ = サ"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_decode_file_url() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
|
|
||||||
"file:///tmp/space here/test#1.html"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_plus_sign() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::decode_url(str!(
|
|
||||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
|
||||||
)),
|
|
||||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,147 +0,0 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_gif87() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_gif89() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_jpeg() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_png() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
|
|
||||||
"image/png"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_svg() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_webp() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
|
|
||||||
"image/webp"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_icon() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
|
|
||||||
"image/x-icon"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_svg_filename() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"<?xml ", "local-file.svg"),
|
|
||||||
"image/svg+xml"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_image_svg_url_uppercase() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
|
|
||||||
"image/svg+xml"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_audio_mpeg() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_audio_mpeg_2() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_audio_mpeg_3() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_audio_ogg() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_audio_wav() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
|
|
||||||
"audio/wav"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_audio_flac() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_video_avi() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"RIFF....AVI LIST", ""),
|
|
||||||
"video/avi"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_video_mp4() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_video_mpeg() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
|
|
||||||
"video/mpeg"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_video_quicktime() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_video_webm() {
|
|
||||||
assert_eq!(
|
|
||||||
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
|
|
||||||
"video/webm"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
|
||||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_unknown_media_type() {
|
|
||||||
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
|
|
||||||
}
|
|
@ -1,44 +0,0 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_data_url_text_html() {
|
|
||||||
assert!(utils::is_data_url(
|
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_data_url_no_media_type() {
|
|
||||||
assert!(utils::is_data_url(
|
|
||||||
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
|
||||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_https_url() {
|
|
||||||
assert!(!utils::is_data_url("https://kernel.org"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_no_protocol_url() {
|
|
||||||
assert!(!utils::is_data_url("//kernel.org"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_empty_string() {
|
|
||||||
assert!(!utils::is_data_url(""));
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
mod clean_url;
|
|
||||||
mod data_to_data_url;
|
|
||||||
mod data_url_to_text;
|
|
||||||
mod decode_url;
|
|
||||||
mod detect_media_type;
|
|
||||||
mod file_url_to_fs_path;
|
|
||||||
mod get_url_fragment;
|
|
||||||
mod is_data_url;
|
|
||||||
mod is_file_url;
|
|
||||||
mod is_http_url;
|
|
||||||
mod resolve_url;
|
|
||||||
mod retrieve_asset;
|
|
||||||
mod url_has_protocol;
|
|
@ -1,229 +0,0 @@
|
|||||||
use url::ParseError;
|
|
||||||
|
|
||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_https_to_level_up_relative() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"https://www.kernel.org/category/signatures.html"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_just_filename_to_full_https_url() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"saved_page.htm",
|
|
||||||
"https://www.kernel.org/category/signatures.html",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"https://www.kernel.org/category/signatures.html"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"https://www.kernel.org",
|
|
||||||
"//www.kernel.org/theme/images/logos/tux.png",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_https_url_to_url_with_no_protocol_and_on_different_hostname(
|
|
||||||
) -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"https://www.kernel.org",
|
|
||||||
"//another-host.org/theme/images/logos/tux.png",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"https://another-host.org/theme/images/logos/tux.png"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_https_url_to_relative_root_path() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"https://www.kernel.org/category/signatures.html",
|
|
||||||
"/theme/images/logos/tux.png",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_https_to_just_filename() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"https://www.w3schools.com/html/html_iframe.asp",
|
|
||||||
"default.asp",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"https://www.w3schools.com/html/default.asp"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_data_url_to_https() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
|
||||||
"https://www.kernel.org/category/signatures.html",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"https://www.kernel.org/category/signatures.html"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_data_url_to_data_url() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
|
||||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_file_url_to_relative_path() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"file:///home/user/Websites/my-website/index.html",
|
|
||||||
"assets/images/logo.png",
|
|
||||||
)
|
|
||||||
.unwrap_or(str!());
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
|
|
||||||
"assets\\images\\logo.png",
|
|
||||||
)
|
|
||||||
.unwrap_or(str!());
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_from_data_url_to_file_url() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
|
||||||
"file:///etc/passwd",
|
|
||||||
)
|
|
||||||
.unwrap_or(str!());
|
|
||||||
|
|
||||||
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_preserve_fragment() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"http://doesnt-matter.local/",
|
|
||||||
"css/fonts/fontmarvelous.svg#fontmarvelous",
|
|
||||||
)
|
|
||||||
.unwrap_or(str!());
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = if cfg!(windows) {
|
|
||||||
utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
|
|
||||||
} else {
|
|
||||||
utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
resolved_url.as_str(),
|
|
||||||
if cfg!(windows) {
|
|
||||||
"file:///c:/image.png"
|
|
||||||
} else {
|
|
||||||
"file:///tmp/image.png"
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
|
||||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
|
|
||||||
let resolved_url = utils::resolve_url(
|
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
|
||||||
"//www.w3schools.com/html/html_iframe.asp",
|
|
||||||
)
|
|
||||||
.unwrap_or(str!());
|
|
||||||
|
|
||||||
assert_eq!(resolved_url.as_str(), "");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
@ -1,137 +0,0 @@
|
|||||||
use crate::utils;
|
|
||||||
use reqwest::blocking::Client;
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::env;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_read_data_url() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
// If both source and target are data URLs,
|
|
||||||
// ensure the result contains target data URL
|
|
||||||
let (retrieved_data, final_url) = utils::retrieve_asset(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"data:text/html;base64,SoUrCe",
|
|
||||||
"data:text/html;base64,TaRgEt",
|
|
||||||
true,
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(&retrieved_data, "data:text/html;base64,TaRgEt");
|
|
||||||
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_read_data_url_ignore_suggested_media_type() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
// Media type parameter should not influence data URLs
|
|
||||||
let (data, final_url) = utils::retrieve_asset(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"data:text/html;base64,SoUrCe",
|
|
||||||
"data:text/html;base64,TaRgEt",
|
|
||||||
true,
|
|
||||||
"image/png",
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(&data, "data:text/html;base64,TaRgEt");
|
|
||||||
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_read_local_file_with_file_url_parent() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
|
||||||
|
|
||||||
// Inclusion of local assets from local sources should be allowed
|
|
||||||
let cwd = env::current_dir().unwrap();
|
|
||||||
let (data, final_url) = utils::retrieve_asset(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
&format!(
|
|
||||||
"{file}{cwd}/src/tests/data/local-file.html",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd.to_str().unwrap()
|
|
||||||
),
|
|
||||||
&format!(
|
|
||||||
"{file}{cwd}/src/tests/data/local-script.js",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd.to_str().unwrap()
|
|
||||||
),
|
|
||||||
true,
|
|
||||||
"application/javascript",
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
|
|
||||||
assert_eq!(
|
|
||||||
&final_url,
|
|
||||||
&format!(
|
|
||||||
"{file}{cwd}/src/tests/data/local-script.js",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd.to_str().unwrap()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
|
||||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_read_local_file_with_data_url_parent() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
// Inclusion of local assets from data URL sources should not be allowed
|
|
||||||
let (data, final_url) = utils::retrieve_asset(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"data:text/html;base64,SoUrCe",
|
|
||||||
"file:///etc/passwd",
|
|
||||||
true,
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(&data, "");
|
|
||||||
assert_eq!(&final_url, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_read_local_file_with_https_parent() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
// Inclusion of local assets from remote sources should not be allowed
|
|
||||||
let (data, final_url) = utils::retrieve_asset(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"https://kernel.org/",
|
|
||||||
"file:///etc/passwd",
|
|
||||||
true,
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(&data, "");
|
|
||||||
assert_eq!(&final_url, "");
|
|
||||||
}
|
|
@ -1,83 +0,0 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_mailto() {
|
|
||||||
assert!(utils::url_has_protocol(
|
|
||||||
"mailto:somebody@somewhere.com?subject=hello"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_tel() {
|
|
||||||
assert!(utils::url_has_protocol("tel:5551234567"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_ftp_no_slashes() {
|
|
||||||
assert!(utils::url_has_protocol("ftp:some-ftp-server.com"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_ftp_with_credentials() {
|
|
||||||
assert!(utils::url_has_protocol(
|
|
||||||
"ftp://user:password@some-ftp-server.com"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_javascript() {
|
|
||||||
assert!(utils::url_has_protocol("javascript:void(0)"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_http() {
|
|
||||||
assert!(utils::url_has_protocol("http://news.ycombinator.com"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_https() {
|
|
||||||
assert!(utils::url_has_protocol("https://github.com"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_mailto_uppercase() {
|
|
||||||
assert!(utils::url_has_protocol(
|
|
||||||
"MAILTO:somebody@somewhere.com?subject=hello"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
|
||||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
|
||||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
|
||||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
|
||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_url_with_no_protocol() {
|
|
||||||
assert!(!utils::url_has_protocol(
|
|
||||||
"//some-hostname.com/some-file.html"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_relative_path() {
|
|
||||||
assert!(!utils::url_has_protocol("some-hostname.com/some-file.html"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_relative_to_root_path() {
|
|
||||||
assert!(!utils::url_has_protocol("/some-file.html"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_empty_string() {
|
|
||||||
assert!(!utils::url_has_protocol(""));
|
|
||||||
}
|
|
82
src/url.rs
Normal file
82
src/url.rs
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
use base64;
|
||||||
|
use percent_encoding::percent_decode_str;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::utils::{detect_media_type, parse_content_type};
|
||||||
|
|
||||||
|
pub const EMPTY_IMAGE_DATA_URL: &'static str = "data:image/png;base64,\
|
||||||
|
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=";
|
||||||
|
|
||||||
|
pub fn clean_url(url: Url) -> Url {
|
||||||
|
let mut url = url.clone();
|
||||||
|
|
||||||
|
// Clear fragment (if any)
|
||||||
|
url.set_fragment(None);
|
||||||
|
|
||||||
|
url
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_data_url(media_type: &str, charset: &str, data: &[u8], final_asset_url: &Url) -> Url {
|
||||||
|
// TODO: move this block out of this function
|
||||||
|
let media_type: String = if media_type.is_empty() {
|
||||||
|
detect_media_type(data, &final_asset_url)
|
||||||
|
} else {
|
||||||
|
media_type.to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut data_url: Url = Url::parse("data:,").unwrap();
|
||||||
|
|
||||||
|
let c: String =
|
||||||
|
if !charset.trim().is_empty() && !charset.trim().eq_ignore_ascii_case("US-ASCII") {
|
||||||
|
format!(";charset={}", charset.trim())
|
||||||
|
} else {
|
||||||
|
"".to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
data_url.set_path(format!("{}{};base64,{}", media_type, c, base64::encode(data)).as_str());
|
||||||
|
|
||||||
|
data_url
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_url_and_has_protocol(input: &str) -> bool {
|
||||||
|
match Url::parse(&input) {
|
||||||
|
Ok(parsed_url) => {
|
||||||
|
return parsed_url.scheme().len() > 0;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
|
||||||
|
let path: String = url.path().to_string();
|
||||||
|
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||||
|
|
||||||
|
// Split data URL into meta data and raw data
|
||||||
|
let content_type: String = path.chars().take(comma_loc).collect();
|
||||||
|
let data: String = path.chars().skip(comma_loc + 1).collect();
|
||||||
|
|
||||||
|
// Parse meta data
|
||||||
|
let (media_type, charset, is_base64) = parse_content_type(&content_type);
|
||||||
|
|
||||||
|
// Parse raw data into vector of bytes
|
||||||
|
let text: String = percent_decode_str(&data).decode_utf8_lossy().to_string();
|
||||||
|
let blob: Vec<u8> = if is_base64 {
|
||||||
|
base64::decode(&text).unwrap_or(vec![])
|
||||||
|
} else {
|
||||||
|
text.as_bytes().to_vec()
|
||||||
|
};
|
||||||
|
|
||||||
|
(media_type, charset, blob)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn resolve_url(from: &Url, to: &str) -> Url {
|
||||||
|
match Url::parse(&to) {
|
||||||
|
Ok(parsed_url) => parsed_url,
|
||||||
|
Err(_) => match from.join(to) {
|
||||||
|
Ok(joined) => joined,
|
||||||
|
Err(_) => Url::parse("data:,").unwrap(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
535
src/utils.rs
535
src/utils.rs
@ -1,11 +1,15 @@
|
|||||||
use base64;
|
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use reqwest::header::CONTENT_TYPE;
|
use reqwest::header::CONTENT_TYPE;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::Path;
|
use std::path::{Path, PathBuf};
|
||||||
use url::{form_urlencoded, ParseError, Url};
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::opts::Options;
|
||||||
|
use crate::url::{clean_url, parse_data_url};
|
||||||
|
|
||||||
|
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
|
||||||
|
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
|
||||||
const MAGIC: [[&[u8]; 2]; 18] = [
|
const MAGIC: [[&[u8]; 2]; 18] = [
|
||||||
// Image
|
// Image
|
||||||
[b"GIF87a", b"image/gif"],
|
[b"GIF87a", b"image/gif"],
|
||||||
@ -29,315 +33,366 @@ const MAGIC: [[&[u8]; 2]; 18] = [
|
|||||||
[b"....moov", b"video/quicktime"],
|
[b"....moov", b"video/quicktime"],
|
||||||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||||
];
|
];
|
||||||
|
|
||||||
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||||
|
"application/javascript",
|
||||||
|
"application/json",
|
||||||
"image/svg+xml",
|
"image/svg+xml",
|
||||||
"text/css",
|
|
||||||
"text/html",
|
|
||||||
"text/javascript",
|
|
||||||
"text/plain",
|
|
||||||
];
|
];
|
||||||
|
|
||||||
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
|
pub fn detect_media_type(data: &[u8], url: &Url) -> String {
|
||||||
let media_type: String = if media_type.is_empty() {
|
// At first attempt to read file's header
|
||||||
detect_media_type(data, &url)
|
for magic_item in MAGIC.iter() {
|
||||||
} else {
|
if data.starts_with(magic_item[0]) {
|
||||||
media_type.to_string()
|
return String::from_utf8(magic_item[1].to_vec()).unwrap();
|
||||||
};
|
}
|
||||||
let hash: String = if fragment != "" {
|
}
|
||||||
format!("#{}", fragment)
|
|
||||||
} else {
|
// If header didn't match any known magic signatures,
|
||||||
str!()
|
// try to guess media type from file name
|
||||||
|
let parts: Vec<&str> = url.path().split('/').collect();
|
||||||
|
detect_media_type_by_file_name(parts.last().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn detect_media_type_by_file_name(filename: &str) -> String {
|
||||||
|
let filename_lowercased: &str = &filename.to_lowercase();
|
||||||
|
let parts: Vec<&str> = filename_lowercased.split('.').collect();
|
||||||
|
|
||||||
|
let mime: &str = match parts.last() {
|
||||||
|
Some(v) => match *v {
|
||||||
|
"avi" => "video/avi",
|
||||||
|
"bmp" => "image/bmp",
|
||||||
|
"css" => "text/css",
|
||||||
|
"flac" => "audio/flac",
|
||||||
|
"gif" => "image/gif",
|
||||||
|
"htm" | "html" => "text/html",
|
||||||
|
"ico" => "image/x-icon",
|
||||||
|
"jpeg" | "jpg" => "image/jpeg",
|
||||||
|
"js" => "application/javascript",
|
||||||
|
"json" => "application/json",
|
||||||
|
"mp3" => "audio/mpeg",
|
||||||
|
"mp4" | "m4v" => "video/mp4",
|
||||||
|
"ogg" => "audio/ogg",
|
||||||
|
"ogv" => "video/ogg",
|
||||||
|
"pdf" => "application/pdf",
|
||||||
|
"png" => "image/png",
|
||||||
|
"svg" => "image/svg+xml",
|
||||||
|
"swf" => "application/x-shockwave-flash",
|
||||||
|
"tif" | "tiff" => "image/tiff",
|
||||||
|
"txt" => "text/plain",
|
||||||
|
"wav" => "audio/wav",
|
||||||
|
"webp" => "image/webp",
|
||||||
|
"woff" => "font/woff",
|
||||||
|
"woff2" => "font/woff2",
|
||||||
|
"xml" => "text/xml",
|
||||||
|
&_ => "",
|
||||||
|
},
|
||||||
|
None => "",
|
||||||
};
|
};
|
||||||
|
|
||||||
format!(
|
mime.to_string()
|
||||||
"data:{};base64,{}{}",
|
|
||||||
media_type,
|
|
||||||
base64::encode(data),
|
|
||||||
hash
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn detect_media_type(data: &[u8], url: &str) -> String {
|
pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> bool {
|
||||||
for item in MAGIC.iter() {
|
if domain_to_match_against.len() == 0 {
|
||||||
if data.starts_with(item[0]) {
|
return false;
|
||||||
return String::from_utf8(item[1].to_vec()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if url.to_lowercase().ends_with(".svg") {
|
if domain_to_match_against == "." {
|
||||||
return str!("image/svg+xml");
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
str!()
|
let domain_partials: Vec<&str> = domain.trim_end_matches(".").rsplit(".").collect();
|
||||||
}
|
let domain_to_match_against_partials: Vec<&str> = domain_to_match_against
|
||||||
|
.trim_end_matches(".")
|
||||||
|
.rsplit(".")
|
||||||
|
.collect();
|
||||||
|
let domain_to_match_against_starts_with_a_dot = domain_to_match_against.starts_with(".");
|
||||||
|
|
||||||
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
|
let mut i: usize = 0;
|
||||||
Url::parse(url.as_ref())
|
let l: usize = std::cmp::max(
|
||||||
.and_then(|u| Ok(u.scheme().len() > 0))
|
domain_partials.len(),
|
||||||
.unwrap_or(false)
|
domain_to_match_against_partials.len(),
|
||||||
}
|
);
|
||||||
|
let mut ok: bool = true;
|
||||||
|
|
||||||
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
|
while i < l {
|
||||||
Url::parse(url.as_ref())
|
// Exit and return false if went out of bounds of domain to match against, and it didn't start with a dot
|
||||||
.and_then(|u| Ok(u.scheme() == "data"))
|
if !domain_to_match_against_starts_with_a_dot
|
||||||
.unwrap_or(false)
|
&& domain_to_match_against_partials.len() < i + 1
|
||||||
}
|
{
|
||||||
|
ok = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
|
let domain_partial = if domain_partials.len() < i + 1 {
|
||||||
Url::parse(url.as_ref())
|
""
|
||||||
.and_then(|u| Ok(u.scheme() == "file"))
|
|
||||||
.unwrap_or(false)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
|
||||||
Url::parse(url.as_ref())
|
|
||||||
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
|
||||||
.unwrap_or(false)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
|
||||||
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
|
||||||
let result = if is_http_url(to.as_ref()) {
|
|
||||||
to.as_ref().to_string()
|
|
||||||
} else {
|
} else {
|
||||||
Url::parse(from.as_ref())?
|
domain_partials.get(i).unwrap()
|
||||||
.join(to.as_ref())?
|
|
||||||
.as_ref()
|
|
||||||
.to_string()
|
|
||||||
};
|
};
|
||||||
Ok(result)
|
let domain_to_match_against_partial = if domain_to_match_against_partials.len() < i + 1 {
|
||||||
}
|
""
|
||||||
|
|
||||||
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
|
|
||||||
if Url::parse(url.as_ref()).unwrap().fragment() == None {
|
|
||||||
str!()
|
|
||||||
} else {
|
} else {
|
||||||
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
|
domain_to_match_against_partials.get(i).unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
let parts_match = domain_to_match_against_partial.eq_ignore_ascii_case(domain_partial);
|
||||||
|
|
||||||
|
if !parts_match && domain_to_match_against_partial.len() != 0 {
|
||||||
|
ok = false;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ok
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
|
pub fn indent(level: u32) -> String {
|
||||||
let mut url = Url::parse(input.as_ref()).unwrap();
|
let mut result: String = String::new();
|
||||||
|
let mut l: u32 = level;
|
||||||
|
|
||||||
// Clear fragment
|
while l > 0 {
|
||||||
url.set_fragment(None);
|
result += " ";
|
||||||
|
l -= 1;
|
||||||
// Get rid of stray question mark
|
|
||||||
if url.query() == Some("") {
|
|
||||||
url.set_query(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove empty trailing ampersand(s)
|
|
||||||
let mut result: String = url.to_string();
|
|
||||||
while result.ends_with("&") {
|
|
||||||
result.pop();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) {
|
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||||
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
media_type.to_lowercase().as_str().starts_with("text/")
|
||||||
let path: String = parsed_url.path().to_string();
|
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
}
|
||||||
|
|
||||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
pub fn parse_content_type(content_type: &str) -> (String, String, bool) {
|
||||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
let mut media_type: String = "text/plain".to_string();
|
||||||
|
let mut charset: String = "US-ASCII".to_string();
|
||||||
let data: String = decode_url(raw_data);
|
let mut is_base64: bool = false;
|
||||||
|
|
||||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
|
||||||
let mut encoding: &str = "";
|
|
||||||
|
|
||||||
let mut media_type: String = str!();
|
|
||||||
let mut text: String = str!();
|
|
||||||
|
|
||||||
|
// Parse meta data
|
||||||
|
let content_type_items: Vec<&str> = content_type.split(';').collect();
|
||||||
let mut i: i8 = 0;
|
let mut i: i8 = 0;
|
||||||
for item in &meta_data_items {
|
for item in &content_type_items {
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
media_type = str!(item);
|
if item.trim().len() > 0 {
|
||||||
|
media_type = item.trim().to_string();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if item.eq_ignore_ascii_case("base64")
|
if item.trim().eq_ignore_ascii_case("base64") {
|
||||||
|| item.eq_ignore_ascii_case("utf8")
|
is_base64 = true;
|
||||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
} else if item.trim().starts_with("charset=") {
|
||||||
{
|
charset = item.trim().chars().skip(8).collect();
|
||||||
encoding = item;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
i = i + 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_plaintext_media_type(&media_type) || media_type.is_empty() {
|
(media_type, charset, is_base64)
|
||||||
if encoding.eq_ignore_ascii_case("base64") {
|
|
||||||
text = String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
|
||||||
} else {
|
|
||||||
text = data
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(media_type, text)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn decode_url(input: String) -> String {
|
|
||||||
let input: String = input.replace("+", "%2B");
|
|
||||||
|
|
||||||
form_urlencoded::parse(input.as_bytes())
|
|
||||||
.map(|(key, val)| {
|
|
||||||
[
|
|
||||||
key.to_string(),
|
|
||||||
if val.to_string().len() == 0 {
|
|
||||||
str!()
|
|
||||||
} else {
|
|
||||||
str!('=')
|
|
||||||
},
|
|
||||||
val.to_string(),
|
|
||||||
]
|
|
||||||
.concat()
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn file_url_to_fs_path(url: &str) -> String {
|
|
||||||
if !is_file_url(url) {
|
|
||||||
return str!();
|
|
||||||
}
|
|
||||||
|
|
||||||
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
|
|
||||||
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
|
|
||||||
let url_fragment = get_url_fragment(url);
|
|
||||||
if url_fragment != "" {
|
|
||||||
let max_len = fs_file_path.len() - 1 - url_fragment.len();
|
|
||||||
fs_file_path = fs_file_path[0..max_len].to_string();
|
|
||||||
}
|
|
||||||
|
|
||||||
if cfg!(windows) {
|
|
||||||
fs_file_path = fs_file_path.replace("/", "\\");
|
|
||||||
}
|
|
||||||
|
|
||||||
// File paths should not be %-encoded
|
|
||||||
decode_url(fs_file_path)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn retrieve_asset(
|
pub fn retrieve_asset(
|
||||||
cache: &mut HashMap<String, Vec<u8>>,
|
cache: &mut HashMap<String, Vec<u8>>,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
parent_url: &str,
|
parent_url: &Url,
|
||||||
url: &str,
|
url: &Url,
|
||||||
as_data_url: bool,
|
options: &Options,
|
||||||
media_type: &str,
|
depth: u32,
|
||||||
opt_silent: bool,
|
) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
|
||||||
) -> Result<(String, String), reqwest::Error> {
|
if url.scheme() == "data" {
|
||||||
if url.len() == 0 {
|
let (media_type, charset, data) = parse_data_url(url);
|
||||||
return Ok((str!(), str!()));
|
Ok((data, url.clone(), media_type, charset))
|
||||||
}
|
} else if url.scheme() == "file" {
|
||||||
|
// Check if parent_url is also a file: URL (if not, then we don't embed the asset)
|
||||||
if is_data_url(&url) {
|
if parent_url.scheme() != "file" {
|
||||||
if as_data_url {
|
if !options.silent {
|
||||||
Ok((url.to_string(), url.to_string()))
|
eprintln!(
|
||||||
|
"{}{}{} ({}){}",
|
||||||
|
indent(depth).as_str(),
|
||||||
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||||
|
&url,
|
||||||
|
"Security Error",
|
||||||
|
if options.no_color {
|
||||||
|
""
|
||||||
} else {
|
} else {
|
||||||
let (_media_type, text) = data_url_to_text(url);
|
ANSI_COLOR_RESET
|
||||||
|
},
|
||||||
Ok((text, url.to_string()))
|
|
||||||
}
|
|
||||||
} else if is_file_url(&url) {
|
|
||||||
// Check if parent_url is also file:///
|
|
||||||
// (if not, then we don't embed the asset)
|
|
||||||
if !is_file_url(&parent_url) {
|
|
||||||
return Ok((str!(), str!()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let fs_file_path: String = file_url_to_fs_path(url);
|
|
||||||
let path = Path::new(&fs_file_path);
|
|
||||||
let url_fragment = get_url_fragment(url);
|
|
||||||
if path.exists() {
|
|
||||||
if !opt_silent {
|
|
||||||
eprintln!("{}", &url);
|
|
||||||
}
|
|
||||||
|
|
||||||
if as_data_url {
|
|
||||||
let data_url: String = data_to_data_url(
|
|
||||||
&media_type,
|
|
||||||
&fs::read(&fs_file_path).unwrap(),
|
|
||||||
&fs_file_path,
|
|
||||||
&url_fragment,
|
|
||||||
);
|
);
|
||||||
Ok((data_url, url.to_string()))
|
}
|
||||||
|
// Provoke error
|
||||||
|
client.get("").send()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let path_buf: PathBuf = url.to_file_path().unwrap().clone();
|
||||||
|
let path: &Path = path_buf.as_path();
|
||||||
|
if path.exists() {
|
||||||
|
if path.is_dir() {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!(
|
||||||
|
"{}{}{} (is a directory){}",
|
||||||
|
indent(depth).as_str(),
|
||||||
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||||
|
&url,
|
||||||
|
if options.no_color {
|
||||||
|
""
|
||||||
} else {
|
} else {
|
||||||
let data: String = fs::read_to_string(&fs_file_path).expect(url);
|
ANSI_COLOR_RESET
|
||||||
Ok((data, url.to_string()))
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Provoke error
|
||||||
|
Err(client.get("").send().unwrap_err())
|
||||||
|
} else {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||||
|
}
|
||||||
|
|
||||||
|
let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
file_blob.clone(),
|
||||||
|
url.clone(),
|
||||||
|
detect_media_type(&file_blob, url),
|
||||||
|
"".to_string(),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Ok((str!(), url.to_string()))
|
if !options.silent {
|
||||||
|
eprintln!(
|
||||||
|
"{}{}{} (not found){}",
|
||||||
|
indent(depth).as_str(),
|
||||||
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||||
|
&url,
|
||||||
|
if options.no_color {
|
||||||
|
""
|
||||||
|
} else {
|
||||||
|
ANSI_COLOR_RESET
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Provoke error
|
||||||
|
Err(client.get("").send().unwrap_err())
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let cache_key: String = clean_url(&url);
|
let cache_key: String = clean_url(url.clone()).as_str().to_string();
|
||||||
|
|
||||||
if cache.contains_key(&cache_key) {
|
if cache.contains_key(&cache_key) {
|
||||||
// URL is in cache, we retrieve it
|
// URL is in cache, we get and return it
|
||||||
let data = cache.get(&cache_key).unwrap();
|
if !options.silent {
|
||||||
|
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
||||||
if !opt_silent {
|
|
||||||
eprintln!("{} (from cache)", &url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if as_data_url {
|
|
||||||
let url_fragment = get_url_fragment(url);
|
|
||||||
Ok((
|
Ok((
|
||||||
data_to_data_url(media_type, data, url, &url_fragment),
|
cache.get(&cache_key).unwrap().to_vec(),
|
||||||
url.to_string(),
|
url.clone(),
|
||||||
|
"".to_string(),
|
||||||
|
"".to_string(),
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
Ok((String::from_utf8_lossy(data).to_string(), url.to_string()))
|
if let Some(domains) = &options.domains {
|
||||||
|
let domain_matches = domains
|
||||||
|
.iter()
|
||||||
|
.any(|d| domain_is_within_domain(url.host_str().unwrap(), &d.trim()));
|
||||||
|
if (options.exclude_domains && domain_matches)
|
||||||
|
|| (!options.exclude_domains && !domain_matches)
|
||||||
|
{
|
||||||
|
return Err(client.get("").send().unwrap_err());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// URL not in cache, we retrieve the file
|
||||||
|
match client.get(url.as_str()).send() {
|
||||||
|
Ok(response) => {
|
||||||
|
if !options.ignore_errors && response.status() != reqwest::StatusCode::OK {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!(
|
||||||
|
"{}{}{} ({}){}",
|
||||||
|
indent(depth).as_str(),
|
||||||
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||||
|
&url,
|
||||||
|
response.status(),
|
||||||
|
if options.no_color {
|
||||||
|
""
|
||||||
} else {
|
} else {
|
||||||
// URL not in cache, we request it
|
ANSI_COLOR_RESET
|
||||||
let mut response = client.get(url).send()?;
|
},
|
||||||
let res_url = response.url().to_string();
|
);
|
||||||
|
}
|
||||||
|
// Provoke error
|
||||||
|
return Err(client.get("").send().unwrap_err());
|
||||||
|
}
|
||||||
|
|
||||||
if !opt_silent {
|
let response_url: Url = response.url().clone();
|
||||||
if url == res_url {
|
|
||||||
eprintln!("{}", &url);
|
if !options.silent {
|
||||||
|
if url.as_str() == response_url.as_str() {
|
||||||
|
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||||
} else {
|
} else {
|
||||||
eprintln!("{} -> {}", &url, &res_url);
|
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &response_url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let new_cache_key: String = clean_url(&res_url);
|
let new_cache_key: String = clean_url(response_url.clone()).to_string();
|
||||||
|
|
||||||
if as_data_url {
|
// Attempt to obtain media type and charset by reading Content-Type header
|
||||||
// Convert response into a byte array
|
let content_type: &str = response
|
||||||
let mut data: Vec<u8> = vec![];
|
|
||||||
response.copy_to(&mut data)?;
|
|
||||||
|
|
||||||
// Attempt to obtain media type by reading the Content-Type header
|
|
||||||
let media_type = if media_type == "" {
|
|
||||||
response
|
|
||||||
.headers()
|
.headers()
|
||||||
.get(CONTENT_TYPE)
|
.get(CONTENT_TYPE)
|
||||||
.and_then(|header| header.to_str().ok())
|
.and_then(|header| header.to_str().ok())
|
||||||
.unwrap_or(&media_type)
|
.unwrap_or("");
|
||||||
|
|
||||||
|
let (media_type, charset, _is_base64) = parse_content_type(&content_type);
|
||||||
|
|
||||||
|
// Convert response into a byte array
|
||||||
|
let mut data: Vec<u8> = vec![];
|
||||||
|
match response.bytes() {
|
||||||
|
Ok(b) => {
|
||||||
|
data = b.to_vec();
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!(
|
||||||
|
"{}{}{}{}",
|
||||||
|
indent(depth).as_str(),
|
||||||
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||||
|
error,
|
||||||
|
if options.no_color {
|
||||||
|
""
|
||||||
} else {
|
} else {
|
||||||
media_type
|
ANSI_COLOR_RESET
|
||||||
};
|
},
|
||||||
let url_fragment = get_url_fragment(url);
|
);
|
||||||
let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Add to cache
|
// Add retrieved resource to cache
|
||||||
cache.insert(new_cache_key, data);
|
cache.insert(new_cache_key, data.clone());
|
||||||
|
|
||||||
Ok((data_url, res_url))
|
// Return
|
||||||
|
Ok((data, response_url, media_type, charset))
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!(
|
||||||
|
"{}{}{} ({}){}",
|
||||||
|
indent(depth).as_str(),
|
||||||
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||||
|
&url,
|
||||||
|
error,
|
||||||
|
if options.no_color {
|
||||||
|
""
|
||||||
} else {
|
} else {
|
||||||
let content = response.text().unwrap();
|
ANSI_COLOR_RESET
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Add to cache
|
Err(client.get("").send().unwrap_err())
|
||||||
cache.insert(new_cache_key, content.as_bytes().to_vec());
|
}
|
||||||
|
|
||||||
Ok((content, res_url))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
11
tests/_data_/css/index.html
Normal file
11
tests/_data_/css/index.html
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<style>
|
||||||
|
|
||||||
|
@charset 'UTF-8';
|
||||||
|
|
||||||
|
@import 'style.css';
|
||||||
|
|
||||||
|
@import url(style.css);
|
||||||
|
|
||||||
|
@import url('style.css');
|
||||||
|
|
||||||
|
</style>
|
1
tests/_data_/css/style.css
Normal file
1
tests/_data_/css/style.css
Normal file
@ -0,0 +1 @@
|
|||||||
|
body{background-color:#000;color:#fff}
|
23
tests/_data_/import-css-via-data-url/index.html
Normal file
23
tests/_data_/import-css-via-data-url/index.html
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
|
<title>Attempt to import CSS via data URL asset</title>
|
||||||
|
<style>
|
||||||
|
|
||||||
|
body {
|
||||||
|
background-color: white;
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
</style>
|
||||||
|
<link href="data:text/css;base64,QGltcG9ydCAic3R5bGUuY3NzIjsK" rel="stylesheet" type="text/css" />
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<p>If you see pink background with white foreground then we’re in trouble</p>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
4
tests/_data_/import-css-via-data-url/style.css
Normal file
4
tests/_data_/import-css-via-data-url/style.css
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
body {
|
||||||
|
background-color: pink;
|
||||||
|
color: white;
|
||||||
|
}
|
17
tests/_data_/integrity/index.html
Normal file
17
tests/_data_/integrity/index.html
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<title>Local HTML file</title>
|
||||||
|
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
|
||||||
|
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>
|
||||||
|
<script src="script.js" integrity="sha256-ecrEsYh3+ICCX8BCrNSotXgI5534282JwJjx8Q9ZWLc="></script>
|
||||||
|
<script src="script.js" integrity="sha256-6idk9dK0bOkVdG7Oz4/0YLXSJya8xZHqbRZKMhYrt6o="></script>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
3
tests/_data_/integrity/script.js
Normal file
3
tests/_data_/integrity/script.js
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
function noop() {
|
||||||
|
console.log("monolith");
|
||||||
|
}
|
4
tests/_data_/integrity/style.css
Normal file
4
tests/_data_/integrity/style.css
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
body {
|
||||||
|
background-color: #000;
|
||||||
|
color: #FFF;
|
||||||
|
}
|
5
tests/_data_/noscript/image.svg
Normal file
5
tests/_data_/noscript/image.svg
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="100%" height="100%" fill="red" />
|
||||||
|
<circle cx="150" cy="100" r="80" fill="green" />
|
||||||
|
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 296 B |
1
tests/_data_/noscript/index.html
Normal file
1
tests/_data_/noscript/index.html
Normal file
@ -0,0 +1 @@
|
|||||||
|
<body><noscript><img src="image.svg" /></noscript></body>
|
1
tests/_data_/noscript/nested.html
Normal file
1
tests/_data_/noscript/nested.html
Normal file
@ -0,0 +1 @@
|
|||||||
|
<body><noscript><h1>JS is not active</h1><noscript><img src="image.svg" /></noscript></noscript></body>
|
1
tests/_data_/noscript/script.html
Normal file
1
tests/_data_/noscript/script.html
Normal file
@ -0,0 +1 @@
|
|||||||
|
<body><noscript><script>alert(1);</script><img src="image.svg" /></noscript></body>
|
5
tests/_data_/svg/image.svg
Normal file
5
tests/_data_/svg/image.svg
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="100%" height="100%" fill="red" />
|
||||||
|
<circle cx="150" cy="100" r="80" fill="green" />
|
||||||
|
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 296 B |
1
tests/_data_/svg/index.html
Normal file
1
tests/_data_/svg/index.html
Normal file
@ -0,0 +1 @@
|
|||||||
|
<div style="background-image: url('image.svg')"></div>
|
9
tests/_data_/unusual_encodings/gb2312.html
Normal file
9
tests/_data_/unusual_encodings/gb2312.html
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content="text/html;charset=GB2312"/>
|
||||||
|
<title>近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 </title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>近七成人减少线下需求 银行数字化转型提速</h1>
|
||||||
|
</body>
|
||||||
|
</html>
|
8
tests/_data_/unusual_encodings/iso-8859-1.html
Normal file
8
tests/_data_/unusual_encodings/iso-8859-1.html
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
© Some Company
|
||||||
|
</body>
|
||||||
|
</html>
|
115
tests/cli/base_url.rs
Normal file
115
tests/cli/base_url.rs
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn add_new_when_provided() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-b")
|
||||||
|
.arg("http://localhost:8000/")
|
||||||
|
.arg("data:text/html,Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain newly added base URL
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head>\
|
||||||
|
<base href=\"http://localhost:8000/\"></base>\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keep_existing_when_none_provided() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain newly added base URL
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head>\
|
||||||
|
<base href=\"http://localhost:8000/\">\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn override_existing_when_provided() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-b")
|
||||||
|
.arg("http://localhost/")
|
||||||
|
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain newly added base URL
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head>\
|
||||||
|
<base href=\"http://localhost/\">\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn set_existing_to_empty_when_empty_provided() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-b")
|
||||||
|
.arg("")
|
||||||
|
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain newly added base URL
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head>\
|
||||||
|
<base href=\"\">\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
}
|
144
tests/cli/basic.rs
Normal file
144
tests/cli/basic.rs
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn print_help_information() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd.arg("-h").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain program name, version, and usage information
|
||||||
|
// TODO
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn print_version() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd.arg("-V").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain program name and version
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stdin_target_input() {
|
||||||
|
let mut echo = Command::new("echo")
|
||||||
|
.arg("Hello from STDIN")
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.spawn()
|
||||||
|
.unwrap();
|
||||||
|
let echo_out = echo.stdout.take().unwrap();
|
||||||
|
echo.wait().unwrap();
|
||||||
|
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
cmd.stdin(echo_out);
|
||||||
|
let out = cmd.arg("-M").arg("-").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML created out of STDIN
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn css_import_string() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let path_html: &Path = Path::new("tests/_data_/css/index.html");
|
||||||
|
let path_css: &Path = Path::new("tests/_data_/css/style.css");
|
||||||
|
|
||||||
|
assert!(path_html.is_file());
|
||||||
|
assert!(path_css.is_file());
|
||||||
|
|
||||||
|
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should list files that got retrieved
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file_url_html}\n \
|
||||||
|
{file_url_css}\n \
|
||||||
|
{file_url_css}\n \
|
||||||
|
{file_url_css}\n\
|
||||||
|
",
|
||||||
|
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||||
|
file_url_css = Url::from_file_path(fs::canonicalize(&path_css).unwrap()).unwrap(),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain embedded CSS url()'s
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod failing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bad_input_empty_target() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd.arg("").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain error description
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
"No target specified\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
|
||||||
|
|
||||||
|
// Exit code should be 1
|
||||||
|
out.assert().code(1);
|
||||||
|
}
|
||||||
|
}
|
233
tests/cli/data_url.rs
Normal file
233
tests/cli/data_url.rs
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn isolate_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-I")
|
||||||
|
.arg("data:text/html,Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain isolated HTML
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_css_from_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-c")
|
||||||
|
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||||
|
<style></style>\
|
||||||
|
</head><body>Hello</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_fonts_from_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-F")
|
||||||
|
.arg("data:text/html,<style>@font-face { font-family: myFont; src: url(font.woff); }</style>Hi")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no web fonts
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
|
||||||
|
<style></style>\
|
||||||
|
</head><body>Hi</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_frames_from_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-f")
|
||||||
|
.arg("data:text/html,<iframe src=\"https://duckduckgo.com\"></iframe>Hi")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no iframes
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
||||||
|
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_images_from_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-i")
|
||||||
|
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no images
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
format!(
|
||||||
|
"<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
<img src=\"{empty_image}\">\
|
||||||
|
Hi\
|
||||||
|
</body>\
|
||||||
|
</html>\n",
|
||||||
|
empty_image = EMPTY_IMAGE_DATA_URL,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_js_from_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-j")
|
||||||
|
.arg("data:text/html,<script>alert(2)</script>Hi")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no JS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
||||||
|
<script></script></head>\
|
||||||
|
<body>Hi</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod failing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bad_input_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain error description
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
"Unsupported document media type\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
|
||||||
|
|
||||||
|
// Exit code should be 1
|
||||||
|
out.assert().code(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn security_disallow_local_assets_within_data_url_targets() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no JS in it
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
}
|
271
tests/cli/local_files.rs
Normal file
271
tests/cli/local_files.rs
Normal file
@ -0,0 +1,271 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::{Path, MAIN_SEPARATOR};
|
||||||
|
use std::process::Command;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn local_file_target_input_relative_target_path() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let cwd_normalized: String = env::current_dir()
|
||||||
|
.unwrap()
|
||||||
|
.to_str()
|
||||||
|
.unwrap()
|
||||||
|
.replace("\\", "/");
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg(format!(
|
||||||
|
"tests{s}_data_{s}basic{s}local-file.html",
|
||||||
|
s = MAIN_SEPARATOR
|
||||||
|
))
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
|
// STDERR should contain list of retrieved file URLs, two missing
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file}{cwd}/tests/_data_/basic/local-file.html\n \
|
||||||
|
{file}{cwd}/tests/_data_/basic/local-style.css\n \
|
||||||
|
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n \
|
||||||
|
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n \
|
||||||
|
{file}{cwd}/tests/_data_/basic/local-script.js\n\
|
||||||
|
",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML from the local file
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"\
|
||||||
|
<!DOCTYPE html><html lang=\"en\"><head>\n \
|
||||||
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||||
|
<title>Local HTML file</title>\n \
|
||||||
|
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
|
||||||
|
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||||
|
<img alt=\"\">\n \
|
||||||
|
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||||
|
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||||
|
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
|
||||||
|
</body></html>\n\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn local_file_target_input_absolute_target_path() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let path_html: &Path = Path::new("tests/_data_/basic/local-file.html");
|
||||||
|
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-Ijci")
|
||||||
|
.arg(path_html.as_os_str())
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain only the target file
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"{file_url_html}\n",
|
||||||
|
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML from the local file
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||||
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||||
|
<title>Local HTML file</title>\n \
|
||||||
|
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||||
|
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||||
|
<img src=\"{empty_image}\" alt=\"\">\n \
|
||||||
|
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||||
|
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||||
|
<script></script>\n\n\n\n\
|
||||||
|
</body></html>\n\
|
||||||
|
",
|
||||||
|
empty_image = EMPTY_IMAGE_DATA_URL
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn local_file_url_target_input() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let cwd_normalized: String = env::current_dir()
|
||||||
|
.unwrap()
|
||||||
|
.to_str()
|
||||||
|
.unwrap()
|
||||||
|
.replace("\\", "/");
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-cji")
|
||||||
|
.arg(format!(
|
||||||
|
"{file}{cwd}/tests/_data_/basic/local-file.html",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
))
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain list of retrieved file URLs
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/tests/_data_/basic/local-file.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML from the local file
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||||
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||||
|
<title>Local HTML file</title>\n \
|
||||||
|
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||||
|
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||||
|
<img src=\"{empty_image}\" alt=\"\">\n \
|
||||||
|
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||||
|
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||||
|
<script></script>\n\n\n\n\
|
||||||
|
</body></html>\n\
|
||||||
|
",
|
||||||
|
empty_image = EMPTY_IMAGE_DATA_URL
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn embed_file_url_local_asset_within_style_attribute() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let path_html: &Path = Path::new("tests/_data_/svg/index.html");
|
||||||
|
let path_svg: &Path = Path::new("tests/_data_/svg/image.svg");
|
||||||
|
|
||||||
|
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should list files that got retrieved
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file_url_html}\n \
|
||||||
|
{file_url_svg}\n\
|
||||||
|
",
|
||||||
|
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||||
|
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with date URL for background-image in it
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body><div style=\"background-image: url("data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=")\"></div>\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn discard_integrity_for_local_files() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let cwd_normalized: String = env::current_dir()
|
||||||
|
.unwrap()
|
||||||
|
.to_str()
|
||||||
|
.unwrap()
|
||||||
|
.replace("\\", "/");
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-i")
|
||||||
|
.arg(if cfg!(windows) {
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/tests/_data_/integrity/index.html",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/tests/_data_/integrity/index.html",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain list of retrieved file URLs
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file}{cwd}/tests/_data_/integrity/index.html\n \
|
||||||
|
{file}{cwd}/tests/_data_/integrity/style.css\n \
|
||||||
|
{file}{cwd}/tests/_data_/integrity/style.css\n \
|
||||||
|
{file}{cwd}/tests/_data_/integrity/script.js\n \
|
||||||
|
{file}{cwd}/tests/_data_/integrity/script.js\n\
|
||||||
|
",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML from the local file; integrity attributes should be missing
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\n \
|
||||||
|
<title>Local HTML file</title>\n \
|
||||||
|
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNGRkY7Cn0K\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n \
|
||||||
|
<link href=\"style.css\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n</head>\n\n<body>\n \
|
||||||
|
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>\n \
|
||||||
|
<script src=\"data:application/javascript;base64,ZnVuY3Rpb24gbm9vcCgpIHsKICAgIGNvbnNvbGUubG9nKCJtb25vbGl0aCIpOwp9Cg==\"></script>\n \
|
||||||
|
<script src=\"script.js\"></script>\n\n\n\n\
|
||||||
|
</body></html>\n\
|
||||||
|
"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
}
|
6
tests/cli/mod.rs
Normal file
6
tests/cli/mod.rs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
mod base_url;
|
||||||
|
mod basic;
|
||||||
|
mod data_url;
|
||||||
|
mod local_files;
|
||||||
|
mod noscript;
|
||||||
|
mod unusual_encodings;
|
170
tests/cli/noscript.rs
Normal file
170
tests/cli/noscript.rs
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::process::Command;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_noscript_contents() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
|
||||||
|
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
|
||||||
|
|
||||||
|
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file_url_html}\n \
|
||||||
|
{file_url_svg}\n\
|
||||||
|
",
|
||||||
|
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||||
|
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body><noscript><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"></noscript>\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unwrap_noscript_contents() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
|
||||||
|
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
|
||||||
|
|
||||||
|
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file_url_html}\n \
|
||||||
|
{file_url_svg}\n\
|
||||||
|
",
|
||||||
|
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||||
|
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unwrap_noscript_contents_nested() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let path_html: &Path = Path::new("tests/_data_/noscript/nested.html");
|
||||||
|
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
|
||||||
|
|
||||||
|
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file_url_html}\n \
|
||||||
|
{file_url_svg}\n\
|
||||||
|
",
|
||||||
|
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||||
|
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unwrap_noscript_contents_with_script() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let path_html: &Path = Path::new("tests/_data_/noscript/script.html");
|
||||||
|
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
|
||||||
|
|
||||||
|
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file_url_html}\n \
|
||||||
|
{file_url_svg}\n\
|
||||||
|
",
|
||||||
|
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||||
|
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html>\
|
||||||
|
<head></head>\
|
||||||
|
<body>\
|
||||||
|
<!--noscript-->\
|
||||||
|
<img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\">\
|
||||||
|
<!--/noscript-->\n\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unwrap_noscript_contents_attr_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-n")
|
||||||
|
.arg("data:text/html,<noscript class=\"\">test</noscript>")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain unwrapped contents of NOSCRIPT element
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
}
|
239
tests/cli/unusual_encodings.rs
Normal file
239
tests/cli/unusual_encodings.rs
Normal file
@ -0,0 +1,239 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use encoding_rs::Encoding;
|
||||||
|
use std::env;
|
||||||
|
use std::path::MAIN_SEPARATOR;
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn properly_save_document_with_gb2312() {
|
||||||
|
let cwd = env::current_dir().unwrap();
|
||||||
|
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg(format!(
|
||||||
|
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||||
|
s = MAIN_SEPARATOR
|
||||||
|
))
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
|
// STDERR should contain only the target file
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain original document without any modificatons
|
||||||
|
let s: String;
|
||||||
|
if let Some(encoding) = Encoding::for_label(b"gb2312") {
|
||||||
|
let (string, _, _) = encoding.decode(&out.stdout);
|
||||||
|
s = string.to_string();
|
||||||
|
} else {
|
||||||
|
s = String::from_utf8_lossy(&out.stdout).to_string();
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
s,
|
||||||
|
"<html>\
|
||||||
|
<head>\n \
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
|
||||||
|
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
|
||||||
|
</head>\n\
|
||||||
|
<body>\n \
|
||||||
|
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn properly_save_document_with_gb2312_from_stdin() {
|
||||||
|
let mut echo = Command::new("cat")
|
||||||
|
.arg(format!(
|
||||||
|
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||||
|
s = MAIN_SEPARATOR
|
||||||
|
))
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.spawn()
|
||||||
|
.unwrap();
|
||||||
|
let echo_out = echo.stdout.take().unwrap();
|
||||||
|
echo.wait().unwrap();
|
||||||
|
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
cmd.stdin(echo_out);
|
||||||
|
let out = cmd.arg("-M").arg("-").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML created out of STDIN
|
||||||
|
let s: String;
|
||||||
|
if let Some(encoding) = Encoding::for_label(b"gb2312") {
|
||||||
|
let (string, _, _) = encoding.decode(&out.stdout);
|
||||||
|
s = string.to_string();
|
||||||
|
} else {
|
||||||
|
s = String::from_utf8_lossy(&out.stdout).to_string();
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
s,
|
||||||
|
"<html>\
|
||||||
|
<head>\n \
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
|
||||||
|
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
|
||||||
|
</head>\n\
|
||||||
|
<body>\n \
|
||||||
|
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn properly_save_document_with_gb2312_custom_charset() {
|
||||||
|
let cwd = env::current_dir().unwrap();
|
||||||
|
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-C")
|
||||||
|
.arg("utf8")
|
||||||
|
.arg(format!(
|
||||||
|
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||||
|
s = MAIN_SEPARATOR
|
||||||
|
))
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
|
// STDERR should contain only the target file
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain original document without any modificatons
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout).to_string(),
|
||||||
|
"<html>\
|
||||||
|
<head>\n \
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=utf8\">\n \
|
||||||
|
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
|
||||||
|
</head>\n\
|
||||||
|
<body>\n \
|
||||||
|
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn properly_save_document_with_gb2312_custom_charset_bad() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-C")
|
||||||
|
.arg("utf0")
|
||||||
|
.arg(format!(
|
||||||
|
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||||
|
s = MAIN_SEPARATOR
|
||||||
|
))
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain error message
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
"Unknown encoding: utf0\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stdout).to_string(), "");
|
||||||
|
|
||||||
|
// Exit code should be 1
|
||||||
|
out.assert().code(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod failing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::path::MAIN_SEPARATOR;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn change_iso88591_to_utf8_to_properly_display_html_entities() {
|
||||||
|
let cwd = env::current_dir().unwrap();
|
||||||
|
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg(format!(
|
||||||
|
"tests{s}_data_{s}unusual_encodings{s}iso-8859-1.html",
|
||||||
|
s = MAIN_SEPARATOR
|
||||||
|
))
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
|
// STDERR should contain only the target file
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/tests/_data_/unusual_encodings/iso-8859-1.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain original document but with UTF-8 charset
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html>\
|
||||||
|
<head>\n \
|
||||||
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n \
|
||||||
|
</head>\n \
|
||||||
|
<body>\n \
|
||||||
|
<EFBFBD> Some Company\n \
|
||||||
|
\n\n</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
}
|
371
tests/css/embed_css.rs
Normal file
371
tests/css/embed_css.rs
Normal file
@ -0,0 +1,371 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use reqwest::blocking::Client;
|
||||||
|
use reqwest::Url;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use monolith::css;
|
||||||
|
use monolith::opts::Options;
|
||||||
|
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_input() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("data:,").unwrap();
|
||||||
|
let options = Options::default();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, "", &options, 0),
|
||||||
|
""
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trim_if_empty() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
|
||||||
|
let options = Options::default();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, "\t \t ", &options, 0,),
|
||||||
|
""
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn style_exclude_unquoted_images() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const STYLE: &str = "/* border: none;*/\
|
||||||
|
background-image: url(https://somewhere.com/bg.png); \
|
||||||
|
list-style: url(/assets/images/bullet.svg);\
|
||||||
|
width:99.998%; \
|
||||||
|
margin-top: -20px; \
|
||||||
|
line-height: -1; \
|
||||||
|
height: calc(100vh - 10pt)";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0,),
|
||||||
|
format!(
|
||||||
|
"/* border: none;*/\
|
||||||
|
background-image: url(\"{empty_image}\"); \
|
||||||
|
list-style: url(\"{empty_image}\");\
|
||||||
|
width:99.998%; \
|
||||||
|
margin-top: -20px; \
|
||||||
|
line-height: -1; \
|
||||||
|
height: calc(100vh - 10pt)",
|
||||||
|
empty_image = EMPTY_IMAGE_DATA_URL
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn style_exclude_single_quoted_images() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("data:,").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const STYLE: &str = "/* border: none;*/\
|
||||||
|
background-image: url('https://somewhere.com/bg.png'); \
|
||||||
|
list-style: url('/assets/images/bullet.svg');\
|
||||||
|
width:99.998%; \
|
||||||
|
margin-top: -20px; \
|
||||||
|
line-height: -1; \
|
||||||
|
height: calc(100vh - 10pt)";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0),
|
||||||
|
format!(
|
||||||
|
"/* border: none;*/\
|
||||||
|
background-image: url(\"{empty_image}\"); \
|
||||||
|
list-style: url(\"{empty_image}\");\
|
||||||
|
width:99.998%; \
|
||||||
|
margin-top: -20px; \
|
||||||
|
line-height: -1; \
|
||||||
|
height: calc(100vh - 10pt)",
|
||||||
|
empty_image = EMPTY_IMAGE_DATA_URL
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn style_block() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("file:///").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
#id.class-name:not(:nth-child(3n+0)) {\n \
|
||||||
|
// border: none;\n \
|
||||||
|
background-image: url(\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\");\n\
|
||||||
|
}\n\
|
||||||
|
\n\
|
||||||
|
html > body {}";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
|
||||||
|
CSS
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn attribute_selectors() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
[data-value] {
|
||||||
|
/* Attribute exists */
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-value=\"foo\"] {
|
||||||
|
/* Attribute has this exact value */
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-value*=\"foo\"] {
|
||||||
|
/* Attribute value contains this value somewhere in it */
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-value~=\"foo\"] {
|
||||||
|
/* Attribute has this value in a space-separated list somewhere */
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-value^=\"foo\"] {
|
||||||
|
/* Attribute value starts with this */
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-value|=\"foo\"] {
|
||||||
|
/* Attribute value starts with this in a dash-separated list */
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-value$=\"foo\"] {
|
||||||
|
/* Attribute value ends with this */
|
||||||
|
}
|
||||||
|
";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
|
||||||
|
CSS
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn import_string() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
@charset 'UTF-8';\n\
|
||||||
|
\n\
|
||||||
|
@import 'data:text/css,html{background-color:%23000}';\n\
|
||||||
|
\n\
|
||||||
|
@import url('data:text/css,html{color:%23fff}')\n\
|
||||||
|
";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||||
|
"\
|
||||||
|
@charset \"UTF-8\";\n\
|
||||||
|
\n\
|
||||||
|
@import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\
|
||||||
|
\n\
|
||||||
|
@import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hash_urls() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
body {\n \
|
||||||
|
behavior: url(#default#something);\n\
|
||||||
|
}\n\
|
||||||
|
\n\
|
||||||
|
.scissorHalf {\n \
|
||||||
|
offset-path: url(#somePath);\n\
|
||||||
|
}\n\
|
||||||
|
";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||||
|
CSS
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn transform_percentages_and_degrees() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
div {\n \
|
||||||
|
transform: translate(-50%, -50%) rotate(-45deg);\n\
|
||||||
|
transform: translate(50%, 50%) rotate(45deg);\n\
|
||||||
|
transform: translate(+50%, +50%) rotate(+45deg);\n\
|
||||||
|
}\n\
|
||||||
|
";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||||
|
CSS
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unusual_indents() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
.is\\:good:hover {\n \
|
||||||
|
color: green\n\
|
||||||
|
}\n\
|
||||||
|
\n\
|
||||||
|
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
|
||||||
|
color: black\n\
|
||||||
|
}\n\
|
||||||
|
";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||||
|
CSS
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exclude_fonts() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_fonts = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
@font-face {\n \
|
||||||
|
font-family: 'My Font';\n \
|
||||||
|
src: url(my_font.woff);\n\
|
||||||
|
}\n\
|
||||||
|
\n\
|
||||||
|
#identifier {\n \
|
||||||
|
font-family: 'My Font' Arial\n\
|
||||||
|
}\n\
|
||||||
|
\n\
|
||||||
|
@font-face {\n \
|
||||||
|
font-family: 'My Font';\n \
|
||||||
|
src: url(my_font.woff);\n\
|
||||||
|
}\n\
|
||||||
|
\n\
|
||||||
|
div {\n \
|
||||||
|
font-family: 'My Font' Verdana\n\
|
||||||
|
}\n\
|
||||||
|
";
|
||||||
|
const CSS_OUT: &str = " \
|
||||||
|
\n\
|
||||||
|
\n\
|
||||||
|
#identifier {\n \
|
||||||
|
font-family: \"My Font\" Arial\n\
|
||||||
|
}\n\
|
||||||
|
\n \
|
||||||
|
\n\
|
||||||
|
\n\
|
||||||
|
div {\n \
|
||||||
|
font-family: \"My Font\" Verdana\n\
|
||||||
|
}\n\
|
||||||
|
";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||||
|
CSS_OUT
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn content() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("data:,").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
#language a[href=\"#translations\"]:before {\n\
|
||||||
|
content: url(data:,) \"\\A\";\n\
|
||||||
|
white-space: pre }\n\
|
||||||
|
";
|
||||||
|
const CSS_OUT: &str = "\
|
||||||
|
#language a[href=\"#translations\"]:before {\n\
|
||||||
|
content: url(\"data:text/plain;base64,\") \"\\a \";\n\
|
||||||
|
white-space: pre }\n\
|
||||||
|
";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||||
|
CSS_OUT
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ie_css_hack() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let document_url: Url = Url::parse("data:,").unwrap();
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
const CSS: &str = "\
|
||||||
|
div#p>svg>foreignObject>section:not(\\9) {\n\
|
||||||
|
width: 300px;\n\
|
||||||
|
width: 500px\\9;\n\
|
||||||
|
}\n\
|
||||||
|
";
|
||||||
|
const CSS_OUT: &str = "\
|
||||||
|
div#p>svg>foreignObject>section:not(\\9) {\n\
|
||||||
|
width: 300px;\n\
|
||||||
|
width: 500px\t;\n\
|
||||||
|
}\n\
|
||||||
|
";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||||
|
CSS_OUT
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod passing {
|
mod passing {
|
||||||
use crate::css;
|
use monolith::css;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn backrgound() {
|
fn backrgound() {
|
||||||
@ -64,7 +64,7 @@ mod passing {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod failing {
|
mod failing {
|
||||||
use crate::css;
|
use monolith::css;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty() {
|
fn empty() {
|
@ -1,3 +1,2 @@
|
|||||||
mod embed_css;
|
mod embed_css;
|
||||||
mod enquote;
|
|
||||||
mod is_image_url_prop;
|
mod is_image_url_prop;
|
@ -1,5 +1,3 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
@ -7,22 +5,25 @@ use crate::utils;
|
|||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn passing_encode_string_with_specific_media_type() {
|
mod passing {
|
||||||
let mime = "application/javascript";
|
use html5ever::serialize::{serialize, SerializeOpts};
|
||||||
let data = "var word = 'hello';\nalert(word);\n";
|
|
||||||
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", "");
|
use monolith::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic() {
|
||||||
|
let html = "<div>text</div>";
|
||||||
|
let mut dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&data_url,
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
"<html><head><link rel=\"icon\" href=\"I_AM_A_FAVICON_DATA_URL\"></link></head><body><div>text</div></body></html>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_encode_append_fragment() {
|
|
||||||
let data = "<svg></svg>\n";
|
|
||||||
let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment");
|
|
||||||
|
|
||||||
assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment");
|
|
||||||
}
|
}
|
@ -1,5 +1,3 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
@ -7,32 +5,41 @@ use crate::utils;
|
|||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn passing_unix_file_url() {
|
mod passing {
|
||||||
assert!(utils::is_file_url(
|
use monolith::html;
|
||||||
"file:///home/user/Websites/my-website/index.html"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_windows_file_url() {
|
fn empty_input_sha256() {
|
||||||
assert!(utils::is_file_url(
|
assert!(html::check_integrity(
|
||||||
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
|
"".as_bytes(),
|
||||||
|
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_unix_url_with_backslashes() {
|
fn sha256() {
|
||||||
assert!(utils::is_file_url(
|
assert!(html::check_integrity(
|
||||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
|
"abcdef0123456789".as_bytes(),
|
||||||
|
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_windows_file_url_with_backslashes() {
|
fn sha384() {
|
||||||
assert!(utils::is_file_url(
|
assert!(html::check_integrity(
|
||||||
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
|
"abcdef0123456789".as_bytes(),
|
||||||
|
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
|
||||||
));
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sha512() {
|
||||||
|
assert!(html::check_integrity(
|
||||||
|
"abcdef0123456789".as_bytes(),
|
||||||
|
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
@ -42,34 +49,41 @@ fn passing_windows_file_url_with_backslashes() {
|
|||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn failing_url_with_no_protocl() {
|
mod failing {
|
||||||
assert!(!utils::is_file_url("//kernel.org"));
|
use monolith::html;
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_dot_slash_filename() {
|
fn empty_hash() {
|
||||||
assert!(!utils::is_file_url("./index.html"));
|
assert!(!html::check_integrity("abcdef0123456789".as_bytes(), ""));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_just_filename() {
|
fn empty_input_empty_hash() {
|
||||||
assert!(!utils::is_file_url("some-local-page.htm"));
|
assert!(!html::check_integrity("".as_bytes(), ""));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_https_ip_port_url() {
|
fn sha256() {
|
||||||
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
|
assert!(!html::check_integrity(
|
||||||
}
|
"abcdef0123456789".as_bytes(),
|
||||||
|
"sha256-badhash"
|
||||||
#[test]
|
|
||||||
fn failing_data_url() {
|
|
||||||
assert!(!utils::is_file_url(
|
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_just_word_file() {
|
fn sha384() {
|
||||||
assert!(!utils::is_file_url("file"));
|
assert!(!html::check_integrity(
|
||||||
|
"abcdef0123456789".as_bytes(),
|
||||||
|
"sha384-badhash"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sha512() {
|
||||||
|
assert!(!html::check_integrity(
|
||||||
|
"abcdef0123456789".as_bytes(),
|
||||||
|
"sha512-badhash"
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
83
tests/html/compose_csp.rs
Normal file
83
tests/html/compose_csp.rs
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use monolith::html;
|
||||||
|
use monolith::opts::Options;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn isolated() {
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.isolate = true;
|
||||||
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
csp_content,
|
||||||
|
"default-src 'unsafe-eval' 'unsafe-inline' data:;"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_css() {
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_css = true;
|
||||||
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
|
assert_eq!(csp_content, "style-src 'none';");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_fonts() {
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_fonts = true;
|
||||||
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
|
assert_eq!(csp_content, "font-src 'none';");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_frames() {
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_frames = true;
|
||||||
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
|
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_js() {
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_js = true;
|
||||||
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
|
assert_eq!(csp_content, "script-src 'none';");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_images() {
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
|
assert_eq!(csp_content, "img-src data:;");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn all() {
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.isolate = true;
|
||||||
|
options.no_css = true;
|
||||||
|
options.no_fonts = true;
|
||||||
|
options.no_frames = true;
|
||||||
|
options.no_js = true;
|
||||||
|
options.no_images = true;
|
||||||
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
|
assert_eq!(csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
|
||||||
|
}
|
||||||
|
}
|
66
tests/html/create_metadata_tag.rs
Normal file
66
tests/html/create_metadata_tag.rs
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use chrono::prelude::*;
|
||||||
|
use reqwest::Url;
|
||||||
|
|
||||||
|
use monolith::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn http_url() {
|
||||||
|
let url: Url = Url::parse("http://192.168.1.1/").unwrap();
|
||||||
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
|
let metadata_comment: String = html::create_metadata_tag(&url);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
metadata_comment,
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from {} at {} using {} v{} -->",
|
||||||
|
&url,
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn file_url() {
|
||||||
|
let url: Url = Url::parse("file:///home/monolith/index.html").unwrap();
|
||||||
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
|
let metadata_comment: String = html::create_metadata_tag(&url);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
metadata_comment,
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from local source at {} using {} v{} -->",
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn data_url() {
|
||||||
|
let url: Url = Url::parse("data:text/html,Hello%2C%20World!").unwrap();
|
||||||
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
|
let metadata_comment: String = html::create_metadata_tag(&url);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
metadata_comment,
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from local source at {} using {} v{} -->",
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
156
tests/html/embed_srcset.rs
Normal file
156
tests/html/embed_srcset.rs
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use reqwest::blocking::Client;
|
||||||
|
use reqwest::Url;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use monolith::html;
|
||||||
|
use monolith::opts::Options;
|
||||||
|
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn small_medium_large() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x";
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
let embedded_css = html::embed_srcset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&Url::parse("data:,").unwrap(),
|
||||||
|
&srcset_value,
|
||||||
|
&options,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
embedded_css,
|
||||||
|
format!(
|
||||||
|
"{} 1x, {} 1.5x, {} 2x",
|
||||||
|
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn small_medium_only_medium_has_scale() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let srcset_value = "small.png, medium.png 1.5x";
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
let embedded_css = html::embed_srcset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&Url::parse("data:,").unwrap(),
|
||||||
|
&srcset_value,
|
||||||
|
&options,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
embedded_css,
|
||||||
|
format!("{}, {} 1.5x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn commas_within_file_names() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let srcset_value = "small,s.png 1x, large,l.png 2x";
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
let embedded_css = html::embed_srcset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&Url::parse("data:,").unwrap(),
|
||||||
|
&srcset_value,
|
||||||
|
&options,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
embedded_css,
|
||||||
|
format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tabs_and_newlines_after_commas() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let srcset_value = "small,s.png 1x,\nmedium,m.png 2x,\nlarge,l.png 3x";
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
let embedded_css = html::embed_srcset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&Url::parse("data:,").unwrap(),
|
||||||
|
&srcset_value,
|
||||||
|
&options,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
embedded_css,
|
||||||
|
format!(
|
||||||
|
"{} 1x, {} 2x, {} 3x",
|
||||||
|
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod failing {
|
||||||
|
use reqwest::blocking::Client;
|
||||||
|
use reqwest::Url;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use monolith::html;
|
||||||
|
use monolith::opts::Options;
|
||||||
|
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trailing_comma() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let srcset_value = "small.png 1x, large.png 2x,";
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
let embedded_css = html::embed_srcset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&Url::parse("data:,").unwrap(),
|
||||||
|
&srcset_value,
|
||||||
|
&options,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
embedded_css,
|
||||||
|
format!("{} 1x, {} 2x,", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
104
tests/html/get_base_url.rs
Normal file
104
tests/html/get_base_url.rs
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use monolith::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn present() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base href=\"https://musicbrainz.org\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html::get_base_url(&dom.document),
|
||||||
|
Some("https://musicbrainz.org".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multiple_tags() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base href=\"https://www.discogs.com/\" />
|
||||||
|
<base href=\"https://musicbrainz.org\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html::get_base_url(&dom.document),
|
||||||
|
Some("https://www.discogs.com/".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod failing {
|
||||||
|
use monolith::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn absent() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(html::get_base_url(&dom.document), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_href() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(html::get_base_url(&dom.document), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_href() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base href=\"\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(html::get_base_url(&dom.document), Some("".to_string()));
|
||||||
|
}
|
||||||
|
}
|
72
tests/html/get_charset.rs
Normal file
72
tests/html/get_charset.rs
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use monolith::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn meta_content_type() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn meta_charset() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset=\"GB2312\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multiple_conflicting_meta_charset_first() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset=\"utf-8\" />
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(html::get_charset(&dom.document), Some("utf-8".to_string()));
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn multiple_conflicting_meta_content_type_first() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||||
|
<meta charset=\"utf-8\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
|
||||||
|
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
|
||||||
|
}
|
||||||
|
}
|
54
tests/html/get_node_attr.rs
Normal file
54
tests/html/get_node_attr.rs
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use html5ever::rcdom::{Handle, NodeData};
|
||||||
|
|
||||||
|
use monolith::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn div_two_style_attributes() {
|
||||||
|
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
*i += 1;
|
||||||
|
|
||||||
|
match &node.data {
|
||||||
|
NodeData::Document => {
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Element { ref name, .. } => {
|
||||||
|
let node_name = name.local.as_ref().to_string();
|
||||||
|
|
||||||
|
if node_name == "body" {
|
||||||
|
assert_eq!(html::get_node_attr(node, "class"), None);
|
||||||
|
} else if node_name == "div" {
|
||||||
|
assert_eq!(
|
||||||
|
html::get_node_attr(node, "style"),
|
||||||
|
Some("color: blue;".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_walk(&dom.document, &mut count);
|
||||||
|
|
||||||
|
assert_eq!(count, 6);
|
||||||
|
}
|
||||||
|
}
|
53
tests/html/get_node_name.rs
Normal file
53
tests/html/get_node_name.rs
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use html5ever::rcdom::{Handle, NodeData};
|
||||||
|
|
||||||
|
use monolith::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parent_node_names() {
|
||||||
|
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
*i += 1;
|
||||||
|
|
||||||
|
match &node.data {
|
||||||
|
NodeData::Document => {
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Element { ref name, .. } => {
|
||||||
|
let node_name = name.local.as_ref().to_string();
|
||||||
|
let parent = html::get_parent_node(node);
|
||||||
|
let parent_node_name = html::get_node_name(&parent);
|
||||||
|
if node_name == "head" || node_name == "body" {
|
||||||
|
assert_eq!(parent_node_name, Some("html"));
|
||||||
|
} else if node_name == "div" {
|
||||||
|
assert_eq!(parent_node_name, Some("body"));
|
||||||
|
} else if node_name == "p" {
|
||||||
|
assert_eq!(parent_node_name, Some("div"));
|
||||||
|
}
|
||||||
|
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_walk(&dom.document, &mut count);
|
||||||
|
|
||||||
|
assert_eq!(count, 7);
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,3 @@
|
|||||||
use crate::utils;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
@ -7,19 +5,27 @@ use crate::utils;
|
|||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn passing_http_url() {
|
mod passing {
|
||||||
assert!(utils::is_http_url("http://kernel.org"));
|
use monolith::html;
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_https_url() {
|
fn icon() {
|
||||||
assert!(utils::is_http_url("https://www.rust-lang.org/"));
|
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
|
||||||
}
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let res: bool = html::has_favicon(&dom.document);
|
||||||
|
|
||||||
#[test]
|
assert!(res);
|
||||||
fn passing_http_url_with_backslashes() {
|
}
|
||||||
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
|
|
||||||
|
#[test]
|
||||||
|
fn shortcut_icon() {
|
||||||
|
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let res: bool = html::has_favicon(&dom.document);
|
||||||
|
|
||||||
|
assert!(res);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
@ -29,29 +35,16 @@ fn passing_http_url_with_backslashes() {
|
|||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn failing_url_with_no_protocol() {
|
mod failing {
|
||||||
assert!(!utils::is_http_url("//kernel.org"));
|
use monolith::html;
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_dot_slash_filename() {
|
fn absent() {
|
||||||
assert!(!utils::is_http_url("./index.html"));
|
let html = "<div>text</div>";
|
||||||
}
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let res: bool = html::has_favicon(&dom.document);
|
||||||
|
|
||||||
#[test]
|
assert!(!res);
|
||||||
fn failing_just_filename() {
|
}
|
||||||
assert!(!utils::is_http_url("some-local-page.htm"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_https_ip_port_url() {
|
|
||||||
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn failing_data_url() {
|
|
||||||
assert!(!utils::is_http_url(
|
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
|
||||||
));
|
|
||||||
}
|
}
|
@ -1,5 +1,3 @@
|
|||||||
use crate::js;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
@ -7,19 +5,24 @@ use crate::js;
|
|||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn passing_onblur_camelcase() {
|
mod passing {
|
||||||
assert!(js::attr_is_event_handler("onBlur"));
|
use monolith::html;
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_onclick_lowercase() {
|
fn icon() {
|
||||||
assert!(js::attr_is_event_handler("onclick"));
|
assert!(html::is_icon("icon"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_onclick_camelcase() {
|
fn shortcut_icon_capitalized() {
|
||||||
assert!(js::attr_is_event_handler("onClick"));
|
assert!(html::is_icon("Shortcut Icon"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn icon_uppercase() {
|
||||||
|
assert!(html::is_icon("ICON"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
@ -29,17 +32,27 @@ fn passing_onclick_camelcase() {
|
|||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn failing_href() {
|
mod failing {
|
||||||
assert!(!js::attr_is_event_handler("href"));
|
use monolith::html;
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_empty_string() {
|
fn mask_icon() {
|
||||||
assert!(!js::attr_is_event_handler(""));
|
assert!(!html::is_icon("mask-icon"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_class() {
|
fn fluid_icon() {
|
||||||
assert!(!js::attr_is_event_handler("class"));
|
assert!(!html::is_icon("fluid-icon"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stylesheet() {
|
||||||
|
assert!(!html::is_icon("stylesheet"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_string() {
|
||||||
|
assert!(!html::is_icon(""));
|
||||||
|
}
|
||||||
}
|
}
|
14
tests/html/mod.rs
Normal file
14
tests/html/mod.rs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
mod add_favicon;
|
||||||
|
mod check_integrity;
|
||||||
|
mod compose_csp;
|
||||||
|
mod create_metadata_tag;
|
||||||
|
mod embed_srcset;
|
||||||
|
mod get_base_url;
|
||||||
|
mod get_charset;
|
||||||
|
mod get_node_attr;
|
||||||
|
mod get_node_name;
|
||||||
|
mod has_favicon;
|
||||||
|
mod is_icon;
|
||||||
|
mod serialize_document;
|
||||||
|
mod set_node_attr;
|
||||||
|
mod walk_and_embed_assets;
|
153
tests/html/serialize_document.rs
Normal file
153
tests/html/serialize_document.rs
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use monolith::html;
|
||||||
|
use monolith::opts::Options;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn div_as_root_element() {
|
||||||
|
let html = "<div><script src=\"some.js\"></script></div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let options = Options::default();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
|
||||||
|
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn full_page_with_no_html_head_or_body() {
|
||||||
|
let html = "<title>Isolated document</title>\
|
||||||
|
<link rel=\"something\" href=\"some.css\" />\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||||
|
<div><script src=\"some.js\"></script></div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.isolate = true;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&html::serialize_document(
|
||||||
|
dom,
|
||||||
|
"".to_string(),
|
||||||
|
&options
|
||||||
|
)),
|
||||||
|
"<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
|
||||||
|
<title>Isolated document</title>\
|
||||||
|
<link rel=\"something\" href=\"some.css\">\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
<div>\
|
||||||
|
<script src=\"some.js\"></script>\
|
||||||
|
</div>\
|
||||||
|
</body>\
|
||||||
|
</html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn doctype_and_the_rest_no_html_head_or_body() {
|
||||||
|
let html = "<!doctype html>\
|
||||||
|
<title>Unstyled document</title>\
|
||||||
|
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
||||||
|
<div style=\"display: none;\"></div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_css = true;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
|
||||||
|
"<!DOCTYPE html>\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||||
|
<title>Unstyled document</title>\
|
||||||
|
<link rel=\"stylesheet\" href=\"main.css\">\
|
||||||
|
</head>\
|
||||||
|
<body><div style=\"display: none;\"></div></body>\
|
||||||
|
</html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
|
||||||
|
let html = "<!doctype html>\
|
||||||
|
<title>Frameless document</title>\
|
||||||
|
<link rel=\"something\"/>\
|
||||||
|
<div><script src=\"some.js\"></script></div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_frames = true;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&html::serialize_document(
|
||||||
|
dom,
|
||||||
|
"".to_string(),
|
||||||
|
&options
|
||||||
|
)),
|
||||||
|
"<!DOCTYPE html>\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
||||||
|
<title>Frameless document</title>\
|
||||||
|
<link rel=\"something\">\
|
||||||
|
</head>\
|
||||||
|
<body><div><script src=\"some.js\"></script></div></body>\
|
||||||
|
</html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn doctype_and_the_rest_all_forbidden() {
|
||||||
|
let html = "<!doctype html>\
|
||||||
|
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||||
|
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||||
|
<div>\
|
||||||
|
<script src=\"some.js\"></script>\
|
||||||
|
<img style=\"width: 100%;\" src=\"some.png\" />\
|
||||||
|
<iframe src=\"some.html\"></iframe>\
|
||||||
|
</div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.isolate = true;
|
||||||
|
options.no_css = true;
|
||||||
|
options.no_fonts = true;
|
||||||
|
options.no_frames = true;
|
||||||
|
options.no_js = true;
|
||||||
|
options.no_images = true;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&html::serialize_document(
|
||||||
|
dom,
|
||||||
|
"".to_string(),
|
||||||
|
&options
|
||||||
|
)),
|
||||||
|
"<!DOCTYPE html>\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
|
||||||
|
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||||
|
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
<div>\
|
||||||
|
<script src=\"some.js\"></script>\
|
||||||
|
<img style=\"width: 100%;\" src=\"some.png\">\
|
||||||
|
<iframe src=\"some.html\"></iframe>\
|
||||||
|
</div>\
|
||||||
|
</body>\
|
||||||
|
</html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
108
tests/html/set_node_attr.rs
Normal file
108
tests/html/set_node_attr.rs
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use html5ever::rcdom::{Handle, NodeData};
|
||||||
|
|
||||||
|
use monolith::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn html_lang_and_body_style() {
|
||||||
|
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
*i += 1;
|
||||||
|
|
||||||
|
match &node.data {
|
||||||
|
NodeData::Document => {
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Element { ref name, .. } => {
|
||||||
|
let node_name = name.local.as_ref().to_string();
|
||||||
|
|
||||||
|
if node_name == "html" {
|
||||||
|
assert_eq!(html::get_node_attr(node, "lang"), Some("en".to_string()));
|
||||||
|
|
||||||
|
html::set_node_attr(node, "lang", Some("de".to_string()));
|
||||||
|
assert_eq!(html::get_node_attr(node, "lang"), Some("de".to_string()));
|
||||||
|
|
||||||
|
html::set_node_attr(node, "lang", None);
|
||||||
|
assert_eq!(html::get_node_attr(node, "lang"), None);
|
||||||
|
|
||||||
|
html::set_node_attr(node, "lang", Some("".to_string()));
|
||||||
|
assert_eq!(html::get_node_attr(node, "lang"), Some("".to_string()));
|
||||||
|
} else if node_name == "body" {
|
||||||
|
assert_eq!(html::get_node_attr(node, "style"), None);
|
||||||
|
|
||||||
|
html::set_node_attr(node, "style", Some("display: none;".to_string()));
|
||||||
|
assert_eq!(
|
||||||
|
html::get_node_attr(node, "style"),
|
||||||
|
Some("display: none;".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_walk(&dom.document, &mut count);
|
||||||
|
|
||||||
|
assert_eq!(count, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn body_background() {
|
||||||
|
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
*i += 1;
|
||||||
|
|
||||||
|
match &node.data {
|
||||||
|
NodeData::Document => {
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Element { ref name, .. } => {
|
||||||
|
let node_name = name.local.as_ref().to_string();
|
||||||
|
|
||||||
|
if node_name == "body" {
|
||||||
|
assert_eq!(
|
||||||
|
html::get_node_attr(node, "background"),
|
||||||
|
Some("1".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
html::set_node_attr(node, "background", None);
|
||||||
|
assert_eq!(html::get_node_attr(node, "background"), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_walk(&dom.document, &mut count);
|
||||||
|
|
||||||
|
assert_eq!(count, 5);
|
||||||
|
}
|
||||||
|
}
|
518
tests/html/walk_and_embed_assets.rs
Normal file
518
tests/html/walk_and_embed_assets.rs
Normal file
@ -0,0 +1,518 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use html5ever::serialize::{serialize, SerializeOpts};
|
||||||
|
use reqwest::blocking::Client;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
use monolith::html;
|
||||||
|
use monolith::opts::Options;
|
||||||
|
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let html: &str = "<div><P></P></div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"<html><head></head><body><div><p></p></div></body></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ensure_no_recursive_iframe() {
|
||||||
|
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ensure_no_recursive_frame() {
|
||||||
|
let html = "<frameset><frame src=\"\"></frameset>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_css() {
|
||||||
|
let html = "\
|
||||||
|
<link rel=\"stylesheet\" href=\"main.css\">\
|
||||||
|
<link rel=\"alternate stylesheet\" href=\"main.css\">\
|
||||||
|
<style>html{background-color: #000;}</style>\
|
||||||
|
<div style=\"display: none;\"></div>\
|
||||||
|
";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_css = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<link rel=\"stylesheet\">\
|
||||||
|
<link rel=\"alternate stylesheet\">\
|
||||||
|
<style></style>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
<div></div>\
|
||||||
|
</body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_images() {
|
||||||
|
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
||||||
|
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
format!(
|
||||||
|
"<html>\
|
||||||
|
<head>\
|
||||||
|
<link rel=\"icon\">\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
<div>\
|
||||||
|
<img src=\"{empty_image}\">\
|
||||||
|
</div>\
|
||||||
|
</body>\
|
||||||
|
</html>",
|
||||||
|
empty_image = EMPTY_IMAGE_DATA_URL
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_body_background_images() {
|
||||||
|
let html =
|
||||||
|
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"<html><head></head><body></body></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_frames() {
|
||||||
|
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_frames = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
</head>\
|
||||||
|
<frameset>\
|
||||||
|
<frame src=\"\">\
|
||||||
|
</frameset>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_iframes() {
|
||||||
|
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_frames = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head></head>\
|
||||||
|
<body>\
|
||||||
|
<iframe src=\"\"></iframe>\
|
||||||
|
</body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_js() {
|
||||||
|
let html = "\
|
||||||
|
<div onClick=\"void(0)\">\
|
||||||
|
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||||
|
<script>alert(1)</script>\
|
||||||
|
</div>\
|
||||||
|
";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_js = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head></head>\
|
||||||
|
<body>\
|
||||||
|
<div>\
|
||||||
|
<script></script>\
|
||||||
|
<script></script>\
|
||||||
|
</div>\
|
||||||
|
</body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keeps_integrity_for_unfamiliar_links() {
|
||||||
|
let html = "<title>Has integrity</title>\
|
||||||
|
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<title>Has integrity</title>\
|
||||||
|
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\">\
|
||||||
|
</head>\
|
||||||
|
<body></body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn discards_integrity_for_known_links_nojs_nocss() {
|
||||||
|
let html = "\
|
||||||
|
<title>No integrity</title>\
|
||||||
|
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
|
||||||
|
<script integrity=\"\" src=\"some.js\"></script>\
|
||||||
|
";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_css = true;
|
||||||
|
options.no_js = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<title>No integrity</title>\
|
||||||
|
<link rel=\"stylesheet\">\
|
||||||
|
<script></script>\
|
||||||
|
</head>\
|
||||||
|
<body></body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn discards_integrity_for_embedded_assets() {
|
||||||
|
let html = "\
|
||||||
|
<title>No integrity</title>\
|
||||||
|
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
|
||||||
|
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
|
||||||
|
";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_css = true;
|
||||||
|
options.no_js = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<title>No integrity</title>\
|
||||||
|
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\">\
|
||||||
|
<script></script>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
</body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn removes_unwanted_meta_tags() {
|
||||||
|
let html = "\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Refresh\" content=\"2\"/>\
|
||||||
|
<meta http-equiv=\"Location\" content=\"https://freebsd.org\"/>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
</body>\
|
||||||
|
</html>\
|
||||||
|
";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_css = true;
|
||||||
|
options.no_frames = true;
|
||||||
|
options.no_js = true;
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<meta content=\"2\">\
|
||||||
|
<meta content=\"https://freebsd.org\">\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
</body>\
|
||||||
|
</html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn processes_noscript_tags() {
|
||||||
|
let html = "\
|
||||||
|
<html>\
|
||||||
|
<body>\
|
||||||
|
<noscript>\
|
||||||
|
<img src=\"image.png\" />\
|
||||||
|
</noscript>\
|
||||||
|
</body>\
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_images = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
<noscript>\
|
||||||
|
<img src=\"{}\">\
|
||||||
|
</noscript>\
|
||||||
|
</body>\
|
||||||
|
</html>",
|
||||||
|
EMPTY_IMAGE_DATA_URL,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn preserves_script_type_json() {
|
||||||
|
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
</body>\
|
||||||
|
</html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,3 @@
|
|||||||
use crate::html;
|
|
||||||
|
|
||||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
@ -7,29 +5,24 @@ use crate::html;
|
|||||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn passing_icon() {
|
mod passing {
|
||||||
assert!(html::is_icon("icon"));
|
use monolith::js;
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_shortcut_icon_capitalized() {
|
fn onblur_camelcase() {
|
||||||
assert!(html::is_icon("Shortcut Icon"));
|
assert!(js::attr_is_event_handler("onBlur"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_icon_uppercase() {
|
fn onclick_lowercase() {
|
||||||
assert!(html::is_icon("ICON"));
|
assert!(js::attr_is_event_handler("onclick"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_mask_icon() {
|
fn onclick_camelcase() {
|
||||||
assert!(html::is_icon("mask-icon"));
|
assert!(js::attr_is_event_handler("onClick"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn passing_fluid_icon() {
|
|
||||||
assert!(html::is_icon("fluid-icon"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
@ -39,12 +32,22 @@ fn passing_fluid_icon() {
|
|||||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[cfg(test)]
|
||||||
fn failing_stylesheet() {
|
mod failing {
|
||||||
assert!(!html::is_icon("stylesheet"));
|
use monolith::js;
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_empty_string() {
|
fn href() {
|
||||||
assert!(!html::is_icon(""));
|
assert!(!js::attr_is_event_handler("href"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_string() {
|
||||||
|
assert!(!js::attr_is_event_handler(""));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn class() {
|
||||||
|
assert!(!js::attr_is_event_handler("class"));
|
||||||
|
}
|
||||||
}
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user