Compare commits

..

120 commits

Author SHA1 Message Date
Jakub Jirutka
7c61b462dd disable unnecessary/unused regex features to reduce binary size
This will reduce the monolith binary size by ~15%.
2022-09-20 11:46:26 -04:00
Simone Mosciatti
ef3684025b move to use http instead of https 2022-09-11 14:30:44 -04:00
Simone Mosciatti
db7ee697b3 rewrite small part of the input argument handling
the commit rewrite a small part of the input argument handling, trying
to follow besr rust practices.
We get rid of a variable and of a mutable reference while keeping the
code a bit more coincise.
2022-09-11 14:30:44 -04:00
Sunshine
89ce5029b9
add option to blacklist/whitelist domains 2022-09-01 13:35:52 -10:00
dependabot[bot]
54609b10e5
Bump iana-time-zone from 0.1.44 to 0.1.46 (#316)
Bumps [iana-time-zone](https://github.com/strawlab/iana-time-zone) from 0.1.44 to 0.1.46.
- [Release notes](https://github.com/strawlab/iana-time-zone/releases)
- [Changelog](https://github.com/strawlab/iana-time-zone/blob/main/CHANGELOG.md)
- [Commits](https://github.com/strawlab/iana-time-zone/compare/0.1.44...v0.1.46)

---
updated-dependencies:
- dependency-name: iana-time-zone
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-08-31 11:35:38 -10:00
Sunshine
013d93bacc
update 3rd-party dependencies and bump version number 2022-08-14 05:12:39 -10:00
Sunshine
0df8613789
Rewrite part of function retrieve_asset, include support for brotli and deflate (#312)
do not crash the app if reqwest throws, add support for deflate & brotli
2022-08-06 19:07:39 -10:00
Sunshine
68a1531a11
Update packages (#313)
update dependencies
2022-08-06 18:21:53 -10:00
Sunshine
99c3be1804
Merge pull request #308 from Y2Z/dependabot/cargo/tokio-1.16.1
Bump tokio from 1.12.0 to 1.16.1
2022-08-06 17:07:18 -10:00
Sunshine
80559e7224
Merge pull request #309 from Y2Z/dependabot/cargo/regex-1.5.5
Bump regex from 1.5.4 to 1.5.5
2022-08-06 16:56:18 -10:00
dependabot[bot]
c5c5f1ca44
Bump regex from 1.5.4 to 1.5.5
Bumps [regex](https://github.com/rust-lang/regex) from 1.5.4 to 1.5.5.
- [Release notes](https://github.com/rust-lang/regex/releases)
- [Changelog](https://github.com/rust-lang/regex/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/regex/compare/1.5.4...1.5.5)

---
updated-dependencies:
- dependency-name: regex
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-06-06 21:07:06 +00:00
dependabot[bot]
de6a13a884
Bump tokio from 1.12.0 to 1.16.1
Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.12.0 to 1.16.1.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.12.0...tokio-1.16.1)

---
updated-dependencies:
- dependency-name: tokio
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-06-06 19:44:19 +00:00
Sunshine
ef16355f9f
Merge pull request #303 from timoteostewart/master
fix typo 'non-standart' to 'non-standard'
2022-03-17 04:21:16 -04:00
Tim Stewart
a4dc0ed9b4
fix typo 'non-standart' to 'non-standard' 2022-03-16 17:54:48 -05:00
Sunshine
cd0e366979
Merge pull request #301 from liamwarfield/patch-1
Updated monk project link
2022-02-22 15:22:33 -10:00
Liam Warfield
d4c6c458f9
Updated monk project link
The monk project has recently moved to Github! Just changing the link here to the new repo.
2022-02-22 14:17:40 -07:00
Sunshine
c9970b3a8e
Merge pull request #292 from snshn/include-unsafe-eval-origin-for-isolated-documents
Include unsafe-eval origin for isolated documents
2021-12-05 20:26:44 -10:00
Sunshine
404d322b99
make tests pass for newly added 'unsafe-eval' origin addition 2021-12-05 20:16:37 -10:00
Sunshine
1b353d0b46
include unsafe-eval origin for isolated documents 2021-12-05 20:09:26 -10:00
Sunshine
f920a5e4d6
Merge pull request #290 from matildepark/patch-1
README: remove duplicate macports instructions
2021-11-10 20:33:35 -10:00
matildepark
d3ca1ecad3
README: remove duplicate macports instructions 2021-11-10 23:10:31 -05:00
Sunshine
9e057472c6
Update README.md 2021-10-20 16:21:55 -10:00
Sunshine
d453145bf8
Merge pull request #288 from snshn/update-markdown-files
Update Markdown files
2021-10-20 15:54:07 -10:00
Sunshine
8c131d649f
update Markdown files 2021-10-20 15:46:08 -10:00
Sunshine
a221fdb368
Merge pull request #287 from snshn/ci-ignore-some-files
Update README files and set CI to ignore irrelevant paths
2021-10-20 15:40:43 -10:00
Sunshine
15dd82e300
update README files, set CI to ignore irrelevant paths 2021-10-20 15:31:54 -10:00
Sunshine
de492caaa5
Merge pull request #286 from snshn/move-test-data
Move test data files under _data_
2021-10-17 22:51:22 -10:00
Sunshine
9096447c70
move test data files under _data_ 2021-10-17 22:46:06 -10:00
Sunshine
354340db86
Merge pull request #285 from snshn/use-percent-encoding-crate
Offload percent decoding to percent-encoding crate
2021-10-17 22:32:10 -10:00
Sunshine
900dd8d163
offload percent decoding to percent-encoding crate 2021-10-17 22:26:11 -10:00
Sunshine
a11c4496b0
Merge pull request #284 from snshn/move-tests-to-upper-level
Get rid of macros, move tests out of src
2021-10-16 21:39:53 -10:00
Sunshine
dd33b16876
Merge pull request #283 from snshn/formatting
Format README.md and annotate workflows
2021-10-16 21:16:53 -10:00
Sunshine
2cc1870033
get rid of macros, move tests out of src 2021-10-16 21:16:37 -10:00
Sunshine
d41e6c041b
format README.md and annotate workflows 2021-10-16 18:48:32 -10:00
Sunshine
460a461373
Update README.md 2021-07-14 00:09:41 -10:00
Sunshine
1e6e87b6aa
Merge pull request #277 from Oliver-Hanikel/master
Reduce size of Docker image
2021-07-11 11:45:18 -10:00
Oliver Hanikel
54094270b3 Update run-in-container.sh 2021-07-11 20:07:48 +02:00
Oliver Hanikel
e6cf367e23 reduce size of docker image 2021-07-11 20:00:39 +02:00
Sunshine
e8437ecb28
Update README.md 2021-07-10 16:41:30 -10:00
Sunshine
543bebbd8d
Merge pull request #275 from snshn/improve-readme-code-snippets
Remove dollar signs from code snippets
2021-07-10 16:40:20 -10:00
Sunshine
dc6c0200bc
remove dollar sign from code snippets 2021-07-10 16:32:56 -10:00
Sunshine
04bdb3072f
Update README.md 2021-07-08 13:14:37 -10:00
Sunshine
a9228f0522
Merge pull request #274 from snshn/arm64-cd-job
Downgrade AArch64 CD job from Ubuntu 20.04 to Ubuntu 18.04
2021-07-06 15:29:55 -10:00
Sunshine
aae68c4c82
downgrade AArch64 CD job from Ubuntu 20.04 to Ubuntu 18.04 2021-07-06 14:41:56 -10:00
Sunshine
dd23826205
Merge pull request #273 from herbygillot/patch-1
README: add MacPorts install instructions
2021-07-04 21:16:18 -10:00
Herby Gillot
781f4cd3b5
README: add MacPorts install instructions 2021-07-05 03:07:55 -04:00
Sunshine
6826b59ab9
Merge pull request #272 from snshn/new-release
New release (2.6.1)
2021-07-03 19:39:32 -10:00
Sunshine
2be725eeb5
bump version number (2.6.0 -> 2.6.1) 2021-07-03 19:33:09 -10:00
Sunshine
dd2e9ca2e5
update crates 2021-07-03 19:31:55 -10:00
Sunshine
50bccae476
Merge pull request #267 from snshn/aarch64-binary
Add GNU/Linux AArch64 CD job
2021-07-03 00:15:04 -10:00
Sunshine
b3bcb1d85b
add GNU/Linux AArch64 CD job 2021-07-03 00:10:14 -10:00
Sunshine
c58d044459
Merge pull request #271 from snshn/fix-charset-detection-mechanism
Fix charset detection logic
2021-07-02 21:47:56 -10:00
Sunshine
eeaea0df16
fix use of wrong charset 2021-07-02 21:35:06 -10:00
Sunshine
2539aac4c0
Merge pull request #265 from snshn/version-bump
Bump version (2.5.0 -> 2.6.0)
2021-06-08 13:16:40 -10:00
Sunshine
03b9af543a
bump version (2.5.0 -> 2.6.0) 2021-06-08 13:09:50 -10:00
Sunshine
1bb8141021
Merge pull request #264 from snshn/fixes
Fixes
2021-06-08 13:04:57 -10:00
Sunshine
4bc8043f0f
account for charset when creating data URLs 2021-06-08 12:54:16 -10:00
Sunshine
5effa38392
use proper charset detection for linked assets 2021-06-08 12:25:19 -10:00
Sunshine
125aeeec3b
improve validation of charset found in HTML, use genuinely infinite timeout 2021-06-08 11:50:46 -10:00
Sunshine
c938ba6a2f
modify proper attribute for (i)frame elements 2021-06-08 04:49:14 -10:00
Sunshine
f354affc36
Merge pull request #263 from snshn/save-with-custom-charset
Add option for saving document using custom encoding
2021-06-08 04:15:49 -10:00
Sunshine
7686b2ea64
avoid excessive parsing of HTML into DOM 2021-06-08 03:57:28 -10:00
Sunshine
b29b9a6a7c
add option for saving document using custom encoding 2021-06-08 03:39:27 -10:00
Sunshine
cbda57cfa8
Merge pull request #262 from snshn/support-more-encodings
Add support for wider range of charsets
2021-06-08 02:39:24 -10:00
Sunshine
b8aa545e8c
add support for wider range of charsets 2021-06-08 02:30:15 -10:00
Sunshine
22a031af5d
Merge pull request #256 from snshn/more-tests-fixes-and-improvements
More tests, fixes, improvements
2021-06-02 04:06:37 -10:00
Sunshine
6e6a60b305
Merge branch 'master' into more-tests-fixes-and-improvements 2021-06-02 04:01:41 -10:00
Sunshine
77d6022d84
bump version (2.4.1 -> 2.5.0) 2021-06-02 04:00:18 -10:00
Sunshine
5db19d1a3e
update dependencies 2021-06-02 03:58:28 -10:00
Sunshine
a6e891b3c5
add more tests 2021-06-02 03:41:41 -10:00
Sunshine
d7a82a008b
Merge pull request #260 from snshn/ie-css-hack-fix
Remove optional trailing space from CSS idents
2021-05-28 23:04:34 -10:00
Sunshine
2369a4dd3c
remove optional trailing space from CSS idents 2021-05-28 12:03:19 -10:00
Sunshine
d27e53fb36
Merge pull request #259 from snshn/related-project-monk
Add Monk to related projects in README.md
2021-05-24 10:54:11 -10:00
Sunshine
2cb51477d2
add Monk to related projects in README.md 2021-05-24 01:47:19 -10:00
Sunshine
a308a20411
simplify code of CLI tests 2021-03-15 20:10:50 -10:00
Sunshine
a6ddf1c13a
simplify code responsible for processing CSS 2021-03-14 19:42:57 -10:00
Sunshine
8256d17efd
Merge pull request #253 from snshn/unwrap-noscript
Make possible to unwrap NOSCRIPT nodes
2021-03-11 22:43:28 -10:00
Sunshine
efa12935ba
Merge pull request #254 from snshn/no-containers-md
Get rid of containers.md (now part of README.md)
2021-03-11 22:39:49 -10:00
Sunshine
7126a98023
Merge pull request #255 from snshn/pkgsrc
Add installation instructions using pkgsrc
2021-03-11 22:38:33 -10:00
Sunshine
c7ee3ec6e2
get rid of containers.md (now part of README.md) 2021-03-11 22:27:44 -10:00
Sunshine
c4218031e2
add installation instructions using pkgsrc 2021-03-11 22:26:32 -10:00
Sunshine
6f918f6c1c
make possible to unwrap NOSCRIPT nodes 2021-03-11 18:18:39 -10:00
Sunshine
6ecda080e8
Merge pull request #252 from snshn/revamp
Revamp codebase
2021-03-11 14:25:10 -10:00
Sunshine
2e86ee67a5
revamp codebase 2021-03-11 14:15:18 -10:00
Sunshine
359616b901
Update README.md 2021-03-09 16:04:32 -10:00
Sunshine
ea2cdab330
Update README.md 2021-03-09 15:52:23 -10:00
Sunshine
4434823c46
Update README.md 2021-03-09 14:49:10 -10:00
Sunshine
e0a78ffc9d
Update README.md 2021-03-09 13:31:15 -10:00
Sunshine
cbbb297473
Merge pull request #251 from snshn/bump-version-again
Bump version number to 2.4.1
2021-03-09 02:17:17 -10:00
Sunshine
98ddb821a5
bump version number 2021-03-09 02:07:07 -10:00
Sunshine
be097b1d4e
Merge pull request #250 from snshn/alternate-stylesheets
Embed alternate stylesheets
2021-03-09 01:58:08 -10:00
Sunshine
325688acf5
add test for alternate stylesheets 2021-03-09 01:48:41 -10:00
Sunshine
11207d49d2
embed alternate stylesheets 2021-03-09 01:46:15 -10:00
Sunshine
96da64e193
Merge pull request #247 from snshn/cc0
Change project license to CC0 1.0 Universal (CC0 1.0)
2021-03-01 13:28:49 -10:00
Sunshine
8a62a51210
Merge pull request #248 from snshn/update-container-instructions
Running in container instructions update
2021-02-28 23:24:10 -10:00
Sunshine
a6ac1df93d
running in container instructions update 2021-02-28 21:46:38 -10:00
Sunshine
49e81149df
switch license to CC0-1.0 2021-02-28 19:54:46 -10:00
Sunshine
a3516b2ae9
Merge pull request #245 from snshn/change-meta-charset-to-utf-8
Forcefully set document's charset to UTF-8
2021-02-23 23:48:49 -10:00
Sunshine
385301bf16
clean up unused code 2021-02-23 23:39:51 -10:00
Sunshine
4921a70dda
Merge branch 'master' into change-meta-charset-to-utf-8 2021-02-23 23:38:03 -10:00
Sunshine
e0273c664a
forcefully set document's charset to UTF-8 2021-02-23 23:35:35 -10:00
Sunshine
6d629bfd4a
Merge pull request #244 from snshn/process-noscript
Process contents of NOSCRIPT tags
2021-02-22 20:13:26 -10:00
Sunshine
ae9d78a891
process contents of NOSCRIPT tags 2021-02-22 19:42:39 -10:00
Sunshine
0f55fb3c49
Merge pull request #243 from snshn/fix-embedding-picture-srcset
Fix embedding of srcset assets for PICTURE nodes
2021-02-22 16:27:22 -10:00
Sunshine
e41fd6a1c6
fix embedding of srcset for PICTURE nodes 2021-02-22 16:21:12 -10:00
Sunshine
eaf662bb3b
Update README.md 2021-02-15 15:38:06 -10:00
Sunshine
fa71f6a42c
Merge pull request #240 from snshn/color
Add color to asset download log
2021-01-30 10:48:35 -10:00
Sunshine
9a27c6c5ee
add color to asset download log 2021-01-29 20:24:35 -10:00
Sunshine
4ad07c0519
Merge pull request #239 from snshn/update-crates
Update dependencies
2021-01-29 17:27:43 -10:00
Sunshine
e78405f2ae
update dependencies 2021-01-29 17:19:38 -10:00
Sunshine
e81462be41
Merge pull request #237 from snshn/choco
Add Chocolatey spec file
2020-12-31 15:32:27 -10:00
Sunshine
b972d717ce
add chocolatey spec 2020-12-31 15:30:41 -10:00
Sunshine
edb679d2b3
Merge pull request #236 from snshn/pipe-in-target-test
Add test for stdin pipe
2020-12-31 14:44:57 -10:00
Sunshine
2e1462a953
add test for stdin pipe 2020-12-31 14:38:31 -10:00
Sunshine
57883b84b2
Merge pull request #235 from snshn/allow-empty-user-agent-string
Make it possible to specify an empty user-agent string
2020-12-31 13:02:35 -10:00
Sunshine
4fa2eda983
make it possible to specify an empty user-agent string 2020-12-31 12:57:22 -10:00
Sunshine
028187a31e
Merge pull request #234 from snshn/update-dependencies
Update crates
2020-12-28 12:11:25 -10:00
Sunshine
c469c30cbd
update crates 2020-12-28 12:04:27 -10:00
Sunshine
6de36243f9
Fix armhf build in cd.yml 2020-12-27 05:52:47 -10:00
Sunshine
4f162d0cc0
Update README.md 2020-12-25 22:59:24 -10:00
99 changed files with 5288 additions and 4048 deletions

View file

@ -3,6 +3,17 @@ name: GNU/Linux
on: on:
push: push:
branches: [ master ] branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'docs/'
- 'snap/'
- '.adr-dir'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs: jobs:
build: build:
@ -17,6 +28,8 @@ jobs:
steps: steps:
- run: git config --global core.autocrlf false - run: git config --global core.autocrlf false
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Build - name: Build
run: cargo build --all --locked --verbose run: cargo build --all --locked --verbose

View file

@ -3,6 +3,17 @@ name: macOS
on: on:
push: push:
branches: [ master ] branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'docs/'
- 'snap/'
- '.adr-dir'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs: jobs:
build: build:
@ -17,6 +28,8 @@ jobs:
steps: steps:
- run: git config --global core.autocrlf false - run: git config --global core.autocrlf false
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Build - name: Build
run: cargo build --all --locked --verbose run: cargo build --all --locked --verbose

View file

@ -3,6 +3,17 @@ name: Windows
on: on:
push: push:
branches: [ master ] branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'docs/'
- 'snap/'
- '.adr-dir'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs: jobs:
build: build:
@ -17,6 +28,8 @@ jobs:
steps: steps:
- run: git config --global core.autocrlf false - run: git config --global core.autocrlf false
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Build - name: Build
run: cargo build --all --locked --verbose run: cargo build --all --locked --verbose

View file

@ -1,4 +1,4 @@
# CD GitHub Actions workflow for Monolith # CD GitHub Actions workflow for monolith
name: CD name: CD
@ -13,10 +13,13 @@ jobs:
runs-on: windows-2019 runs-on: windows-2019
steps: steps:
- run: git config --global core.autocrlf false - run: git config --global core.autocrlf false
- name: Checkout the repository - name: Checkout the repository
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Build the executable - name: Build the executable
run: cargo build --release run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0 - uses: Shopify/upload-to-release@1.0.0
with: with:
name: monolith.exe name: monolith.exe
@ -28,24 +31,27 @@ jobs:
steps: steps:
- name: Checkout the repository - name: Checkout the repository
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Prepare cross-platform environment - name: Prepare cross-platform environment
run: | run: |
sudo mkdir -p /cross-build-arm sudo mkdir /cross-build
sudo touch /etc/apt/sources.list.d/armhf.list sudo touch /etc/apt/sources.list.d/armhf.list
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
sudo apt-get update sudo apt-get update
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
sudo apt-get download libssl1.1:armhf libssl-dev:armhf sudo apt-get download libssl1.1:armhf libssl-dev:armhf
sudo dpkg -x libssl1.1*.deb /cross-build-arm sudo dpkg -x libssl1.1*.deb /cross-build
sudo dpkg -x libssl-dev*.deb /cross-build-arm sudo dpkg -x libssl-dev*.deb /cross-build
rustup target add arm-unknown-linux-gnueabihf rustup target add arm-unknown-linux-gnueabihf
echo "::set-env name=C_INCLUDE_PATH::/cross-build-arm/usr/include" echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
echo "::set-env name=OPENSSL_INCLUDE_DIR::/cross-build-arm/usr/include/arm-linux-gnueabihf" echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "::set-env name=OPENSSL_LIB_DIR::/cross-build-arm/usr/lib/arm-linux-gnueabihf" echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "::set-env name=PKG_CONFIG_ALLOW_CROSS::1" echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
echo "::set-env name=RUSTFLAGS::-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf" echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build/usr/lib/arm-linux-gnueabihf -L/cross-build/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
- name: Build the executable - name: Build the executable
run: cargo build --release --target=arm-unknown-linux-gnueabihf run: cargo build --release --target=arm-unknown-linux-gnueabihf
- name: Attach artifact to the release - name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0 uses: Shopify/upload-to-release@1.0.0
with: with:
@ -53,13 +59,48 @@ jobs:
path: target/arm-unknown-linux-gnueabihf/release/monolith path: target/arm-unknown-linux-gnueabihf/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_aarch64:
runs-on: ubuntu-18.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Prepare cross-platform environment
run: |
sudo mkdir /cross-build
sudo touch /etc/apt/sources.list.d/arm64.list
echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/arm64.list
sudo apt-get update
sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross
sudo apt-get download libssl1.1:arm64 libssl-dev:arm64
sudo dpkg -x libssl1.1*.deb /cross-build
sudo dpkg -x libssl-dev*.deb /cross-build
rustup target add aarch64-unknown-linux-gnu
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/aarch64-linux-gnu" >> $GITHUB_ENV
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
echo "RUSTFLAGS=-C linker=aarch64-linux-gnu-gcc -L/usr/aarch64-linux-gnu/lib -L/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
- name: Build the executable
run: cargo build --release --target=aarch64-unknown-linux-gnu
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-aarch64
path: target/aarch64-unknown-linux-gnu/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_x86_64: gnu_linux_x86_64:
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
steps: steps:
- name: Checkout the repository - name: Checkout the repository
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Build the executable - name: Build the executable
run: cargo build --release run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0 - uses: Shopify/upload-to-release@1.0.0
with: with:
name: monolith-gnu-linux-x86_64 name: monolith-gnu-linux-x86_64

View file

@ -1,8 +1,21 @@
# CI GitHub Actions workflow for monolith
name: CI name: CI
on: on:
pull_request: pull_request:
branches: [ master ] branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'docs/'
- 'snap/'
- '.adr-dir'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs: jobs:
build_and_test: build_and_test:
@ -21,11 +34,15 @@ jobs:
steps: steps:
- run: git config --global core.autocrlf false - run: git config --global core.autocrlf false
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Build - name: Build
run: cargo build --all --locked --verbose run: cargo build --all --locked --verbose
- name: Run tests - name: Run tests
run: cargo test --all --locked --verbose run: cargo test --all --locked --verbose
- name: Check code formatting - name: Check code formatting
run: | run: |
rustup component add rustfmt rustup component add rustfmt

1850
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[package] [package]
name = "monolith" name = "monolith"
version = "2.4.0" version = "2.6.2"
authors = [ authors = [
"Sunshine <sunshine@uberspace.net>", "Sunshine <sunshine@uberspace.net>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>", "Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
@ -19,24 +19,30 @@ include = [
"src/*.rs", "src/*.rs",
"Cargo.toml", "Cargo.toml",
] ]
license = "Unlicense" license = "CC0-1.0"
license-file = "LICENSE"
[dependencies] [dependencies]
base64 = "0.13.0" atty = "0.2.14" # Used for highlighting network errors
chrono = "0.4.19" # Used for formatting creation timestamp base64 = "0.13.0" # Used for integrity attributes
clap = "2.33.3" chrono = "0.4.20" # Used for formatting creation timestamp
cssparser = "0.27.2" clap = "3.2.16"
cssparser = "0.29.6"
encoding_rs = "0.8.31"
html5ever = "0.24.1" html5ever = "0.24.1"
regex = "1.4.2" # Used for parsing srcset percent-encoding = "2.1.0"
sha2 = "0.9.2" # Used for calculating checksums during integrity checks sha2 = "0.10.2" # Used for calculating checksums during integrity checks
url = "2.2.0" url = "2.2.2"
# Used for parsing srcset and NOSCRIPT
[dependencies.regex]
version = "1.6.0"
default-features = false
features = ["std", "perf-dfa", "unicode-perl"]
[dependencies.reqwest] [dependencies.reqwest]
version = "0.10.9" version = "0.11.11"
default-features = false default-features = false
features = ["default-tls", "blocking", "gzip"] features = ["default-tls", "blocking", "gzip", "brotli", "deflate"]
[dev-dependencies] [dev-dependencies]
assert_cmd = "1.0.1" assert_cmd = "2.0.4"
tempfile = "3.1.0"

View file

@ -1,18 +1,22 @@
FROM rust FROM ekidd/rust-musl-builder as builder
WORKDIR /usr/local/src/ RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
| grep "tarball_url.*\"," \ | grep "tarball_url.*\"," \
| cut -d '"' -f 4 \ | cut -d '"' -f 4)
| wget -qi - -O monolith.tar.gz
RUN tar xfz monolith.tar.gz \ RUN tar xfz monolith.tar.gz \
&& mv Y2Z-monolith-* monolith \ && mv Y2Z-monolith-* monolith \
&& rm monolith.tar.gz && rm monolith.tar.gz
WORKDIR /usr/local/src/monolith WORKDIR monolith/
RUN ls -a
RUN make install RUN make install
FROM alpine
RUN apk update && \
apk add --no-cache openssl && \
rm -rf "/var/cache/apk/*"
COPY --from=builder /home/rust/.cargo/bin/monolith /usr/bin/monolith
WORKDIR /tmp WORKDIR /tmp
CMD ["/usr/local/cargo/bin/monolith"] ENTRYPOINT ["/usr/bin/monolith"]

137
LICENSE
View file

@ -1,24 +1,121 @@
This is free and unencumbered software released into the public domain. Creative Commons Legal Code
Anyone is free to copy, modify, publish, use, compile, sell, or CC0 1.0 Universal
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
of this software dedicate any and all copyright interest in the LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
software to the public domain. We make this dedication for the benefit ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
of the public at large and to the detriment of our heirs and INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
successors. We intend this dedication to be an overt act of REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
relinquishment in perpetuity of all present and future rights to this PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
software under copyright law. THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
HEREUNDER.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Statement of Purpose
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org> The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.

View file

@ -10,7 +10,7 @@ build:
test: build test: build
@cargo test --locked @cargo test --locked
@cargo fmt --all -- --check @cargo fmt --all -- --check
.PHONY: test_code_formatting .PHONY: test
lint: lint:
@cargo fmt --all -- @cargo fmt --all --

134
README.md
View file

@ -1,6 +1,6 @@
[![Monolith Build Status for GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux) [![monolith build status on GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
[![Monolith Build Status for macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS) [![monolith build status on macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
[![Monolith Build Status for Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows) [![monolith build status on Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
``` ```
_____ ______________ __________ ___________________ ___ _____ ______________ __________ ___________________ ___
@ -18,49 +18,104 @@ Unlike the conventional “Save page as”, `monolith` not only saves the target
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available. If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
--------------------------------------------------- ---------------------------------------------------
## Installation ## Installation
### Using Cargo #### Using [Cargo](https://crates.io/crates/monolith)
$ cargo install monolith
#### Via Homebrew (on macOS and GNU/Linux) ```console
$ brew install monolith cargo install monolith
```
#### Using Snapcraft (on GNU/Linux) #### Via [Homebrew](https://formulae.brew.sh/formula/monolith) (macOS and GNU/Linux)
$ snap install monolith
#### Using Ports collection (on FreeBSD and TrueOS) ```console
$ cd /usr/ports/www/monolith/ brew install monolith
$ make install clean ```
#### Using pre-built binaries (Windows, ARM-based devices, etc) #### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS)
Every [release](https://github.com/Y2Z/monolith/releases) contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
#### From source ```console
sudo port install monolith
```
Dependency: `libssl-dev` #### Using [Snapcraft](https://snapcraft.io/monolith) (GNU/Linux)
$ git clone https://github.com/Y2Z/monolith.git ```console
$ cd monolith snap install monolith
$ make install ```
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
```console
pkg install monolith
```
#### Using [FreeBSD ports](https://www.freshports.org/www/monolith/) (FreeBSD)
```console
cd /usr/ports/www/monolith/
make install clean
```
#### Using [pkgsrc](https://pkgsrc.se/www/monolith) (NetBSD, OpenBSD, Haiku, etc)
```console
cd /usr/pkgsrc/www/monolith
make install clean
```
#### Using [containers](https://www.docker.com/)
```console
docker build -t Y2Z/monolith .
sudo install -b dist/run-in-container.sh /usr/local/bin/monolith
```
#### From [source](https://github.com/Y2Z/monolith)
Dependency: `libssl`
```console
git clone https://github.com/Y2Z/monolith.git
cd monolith
make install
```
#### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc)
Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standard CPU architecture.
#### Using Containers
The guide can be found [here](docs/containers.md)
--------------------------------------------------- ---------------------------------------------------
## Usage ## Usage
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
```console
monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
```
```console
cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
```
--------------------------------------------------- ---------------------------------------------------
## Options ## Options
- `-a`: Exclude audio sources - `-a`: Exclude audio sources
- `-b`: Use custom base URL - `-b`: Use custom `base URL`
- `-c`: Exclude CSS - `-c`: Exclude CSS
- `-C`: Save document using custom `charset`
- `-d`: Allow retrieving assets only from specified `domain(s)`
- `-e`: Ignore network errors - `-e`: Ignore network errors
- `-E`: Avoid retrieving assets located within specified domains
- `-f`: Omit frames - `-f`: Omit frames
- `-F`: Exclude web fonts - `-F`: Exclude web fonts
- `-i`: Remove images - `-i`: Remove images
@ -68,36 +123,53 @@ The guide can be found [here](docs/containers.md)
- `-j`: Exclude JavaScript - `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates - `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information - `-M`: Don't add timestamp and URL information
- `-o`: Write output to file - `-n`: Extract contents of NOSCRIPT elements
- `-o`: Write output to `file` (use “-” for STDOUT)
- `-s`: Be quiet - `-s`: Be quiet
- `-t`: Adjust network request timeout - `-t`: Adjust `network request timeout`
- `-u`: Provide custom User-Agent - `-u`: Provide custom `User-Agent`
- `-v`: Exclude videos - `-v`: Exclude videos
--------------------------------------------------- ---------------------------------------------------
## Proxies ## Proxies
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables. Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
--------------------------------------------------- ---------------------------------------------------
## Contributing ## Contributing
Please open an issue if something is wrong, that helps make this project better. Please open an issue if something is wrong, that helps make this project better.
--------------------------------------------------- ---------------------------------------------------
## Related projects ## Related projects
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver - Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
- `Personal WayBack Machine`: https://github.com/popey/pwbm - Pagesaver: https://github.com/distributed-mind/pagesaver
- `Hako`: https://github.com/dmpop/hako - Personal WayBack Machine: https://github.com/popey/pwbm
- Hako: https://github.com/dmpop/hako
- Monk: https://github.com/monk-dev/monk
--------------------------------------------------- ---------------------------------------------------
## License ## License
The Unlicense
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty.
--------------------------------------------------- ---------------------------------------------------
<!-- Microtext --> <!-- Microtext -->
<sub>Keep in mind that `monolith` is not aware of your browsers session</sub> <sub>Keep in mind that `monolith` is not aware of your browsers session</sub>

10
dist/run-in-container.sh vendored Normal file
View file

@ -0,0 +1,10 @@
#!/bin/sh
DOCKER=docker
PROG_NAME=monolith
if which podman 2>&1 > /dev/null; then
DOCKER=podman
fi
$DOCKER run --rm Y2Z/$PROG_NAME "$@"

View file

@ -1,15 +0,0 @@
1. Run `docker build -t y2z/monolith .` to create a Docker image
2. Create a file named `monolith` which contains:
```sh
#!/bin/sh
docker run --rm \
y2z/monolith \
monolith \
"$@"
```
3. Make the file executable (`chmod +x monolith`) and include it into your `$PATH`
4. Now you should be able to run a containerized build of monolith like this:
`monolith -I https://github.com > document.html`

View file

@ -1,10 +1,23 @@
# Web apps that can be saved with Monolith # Web apps that can be saved with Monolith
These apps retain most or all of their functionality when saved with Monolith These apps retain all or most of their functionality when saved with Monolith:
|Converse|https://conversejs.org|
|:--|:--| ## Converse
| Website | https://conversejs.org |
|:-----------------------|:--------------------------------------------------------------------|
| Description | An XMPP client built using web technologies | | Description | An XMPP client built using web technologies |
| Functionality retained | **full** | | Functionality retained | **full** |
| Command to use | `monolith https://conversejs.org/fullscreen.html > conversejs.html` | | Command to use | `monolith https://conversejs.org/fullscreen.html > conversejs.html` |
| Monolith version used | 2.2.7 | | Monolith version used | 2.2.7 |
## Markdown Tables generator
| Website | https://www.tablesgenerator.com |
|:--------------------------|:-----------------------------------------------------------------------------------------------|
| Description | Tool for creating tables in extended Markdown format |
| Functionality retained | **full** |
| Command to use | `monolith -I https://www.tablesgenerator.com/markdown_tables -o markdown-table-generator.html` |
| Monolith version used | 2.6.1 |

25
monolith.nuspec Normal file
View file

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
<metadata>
<id>monolith</id>
<version>2.4.0</version>
<title>Monolith</title>
<authors>Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd</authors>
<projectUrl>https://github.com/Y2Z/monolith</projectUrl>
<iconUrl>https://raw.githubusercontent.com/Y2Z/monolith/master/assets/icon/icon.png</iconUrl>
<licenseUrl>https://raw.githubusercontent.com/Y2Z/monolith/master/LICENSE</licenseUrl>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>CLI tool for saving complete web pages as a single HTML file
A data hoarders dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
Unlike the conventional “Save page as”, monolith not only saves the target document, it embeds CSS, image, and JavaScript assets all at once, producing a single HTML5 document that is a joy to store and share.
If compared to saving websites using wget, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
</description>
<copyright>Public Domain</copyright>
<language>en-US</language>
<tags>scraping archiving</tags>
<docsUrl>https://github.com/Y2Z/monolith/blob/master/README.md</docsUrl>
</metadata>
</package>

View file

@ -1,9 +1,12 @@
use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token}; use cssparser::{
serialize_identifier, serialize_string, ParseError, Parser, ParserInput, SourcePosition, Token,
};
use reqwest::blocking::Client; use reqwest::blocking::Client;
use std::collections::HashMap; use std::collections::HashMap;
use url::Url;
use crate::opts::Options; use crate::opts::Options;
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment}; use crate::url::{create_data_url, resolve_url, EMPTY_IMAGE_DATA_URL};
use crate::utils::retrieve_asset; use crate::utils::retrieve_asset;
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
@ -26,7 +29,44 @@ const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
"suffix", "suffix",
"symbols", "symbols",
]; ];
const CSS_SPECIAL_CHARS: &str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
pub fn embed_css(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
document_url: &Url,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
document_url,
&mut parser,
options,
depth,
"",
"",
"",
)
.unwrap()
}
pub fn format_ident(ident: &str) -> String {
let mut res: String = "".to_string();
let _ = serialize_identifier(ident, &mut res);
res = res.trim_end().to_string();
res
}
pub fn format_quoted_string(string: &str) -> String {
let mut res: String = "".to_string();
let _ = serialize_string(string, &mut res);
res
}
pub fn is_image_url_prop(prop_name: &str) -> bool { pub fn is_image_url_prop(prop_name: &str) -> bool {
CSS_PROPS_WITH_IMAGE_URLS CSS_PROPS_WITH_IMAGE_URLS
@ -35,30 +75,10 @@ pub fn is_image_url_prop(prop_name: &str) -> bool {
.is_some() .is_some()
} }
pub fn enquote(input: String, double: bool) -> String {
if double {
format!("\"{}\"", input.replace("\"", "\\\""))
} else {
format!("'{}'", input.replace("'", "\\'"))
}
}
pub fn escape(value: &str) -> String {
let mut res = str!(&value);
res = res.replace("\\", "\\\\");
for c in CSS_SPECIAL_CHARS.chars() {
res = res.replace(c, format!("\\{}", c).as_str());
}
res
}
pub fn process_css<'a>( pub fn process_css<'a>(
cache: &mut HashMap<String, Vec<u8>>, cache: &mut HashMap<String, Vec<u8>>,
client: &Client, client: &Client,
parent_url: &str, document_url: &Url,
parser: &mut Parser, parser: &mut Parser,
options: &Options, options: &Options,
depth: u32, depth: u32,
@ -66,10 +86,10 @@ pub fn process_css<'a>(
prop_name: &str, prop_name: &str,
func_name: &str, func_name: &str,
) -> Result<String, ParseError<'a, String>> { ) -> Result<String, ParseError<'a, String>> {
let mut result: String = str!(); let mut result: String = "".to_string();
let mut curr_rule: String = str!(rule_name.clone()); let mut curr_rule: String = rule_name.clone().to_string();
let mut curr_prop: String = str!(prop_name.clone()); let mut curr_prop: String = prop_name.clone().to_string();
let mut token: &Token; let mut token: &Token;
let mut token_offset: SourcePosition; let mut token_offset: SourcePosition;
@ -85,7 +105,7 @@ pub fn process_css<'a>(
match *token { match *token {
Token::Comment(_) => { Token::Comment(_) => {
let token_slice = parser.slice_from(token_offset); let token_slice = parser.slice_from(token_offset);
result.push_str(str!(token_slice).as_str()); result.push_str(token_slice);
} }
Token::Semicolon => result.push_str(";"), Token::Semicolon => result.push_str(";"),
Token::Colon => result.push_str(":"), Token::Colon => result.push_str(":"),
@ -112,7 +132,7 @@ pub fn process_css<'a>(
process_css( process_css(
cache, cache,
client, client,
parent_url, document_url,
parser, parser,
options, options,
depth, depth,
@ -141,13 +161,13 @@ pub fn process_css<'a>(
} }
// div... // div...
Token::Ident(ref value) => { Token::Ident(ref value) => {
curr_rule = str!(); curr_rule = "".to_string();
curr_prop = str!(value); curr_prop = value.to_string();
result.push_str(&escape(value)); result.push_str(&format_ident(value));
} }
// @import, @font-face, @charset, @media... // @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => { Token::AtKeyword(ref value) => {
curr_rule = str!(value); curr_rule = value.to_string();
if options.no_fonts && curr_rule == "font-face" { if options.no_fonts && curr_rule == "font-face" {
continue; continue;
} }
@ -161,27 +181,32 @@ pub fn process_css<'a>(
Token::QuotedString(ref value) => { Token::QuotedString(ref value) => {
if curr_rule == "import" { if curr_rule == "import" {
// Reset current at-rule value // Reset current at-rule value
curr_rule = str!(); curr_rule = "".to_string();
// Skip empty import values // Skip empty import values
if value.len() < 1 { if value.len() == 0 {
result.push_str("''"); result.push_str("''");
continue; continue;
} }
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default(); let import_full_url: Url = resolve_url(&document_url, value);
let import_url_fragment = get_url_fragment(import_full_url.clone());
match retrieve_asset( match retrieve_asset(
cache, cache,
client, client,
&parent_url, &document_url,
&import_full_url, &import_full_url,
options, options,
depth + 1, depth + 1,
) { ) {
Ok((import_contents, import_final_url, _import_media_type)) => { Ok((
let import_data_url = data_to_data_url( import_contents,
"text/css", import_final_url,
import_media_type,
import_charset,
)) => {
let mut import_data_url = create_data_url(
&import_media_type,
&import_charset,
embed_css( embed_css(
cache, cache,
client, client,
@ -193,63 +218,64 @@ pub fn process_css<'a>(
.as_bytes(), .as_bytes(),
&import_final_url, &import_final_url,
); );
let assembled_url: String = url_with_fragment( import_data_url.set_fragment(import_full_url.fragment());
import_data_url.as_str(), result.push_str(
import_url_fragment.as_str(), format_quoted_string(&import_data_url.to_string()).as_str(),
); );
result.push_str(enquote(assembled_url, false).as_str());
} }
Err(_) => { Err(_) => {
// Keep remote reference if unable to retrieve the asset // Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) { if import_full_url.scheme() == "http"
let assembled_url: String = url_with_fragment( || import_full_url.scheme() == "https"
import_full_url.as_str(), {
import_url_fragment.as_str(), result.push_str(
format_quoted_string(&import_full_url.to_string()).as_str(),
); );
result.push_str(enquote(assembled_url, false).as_str());
} }
} }
} }
} else { } else {
if func_name == "url" { if func_name == "url" {
// Skip empty url()'s // Skip empty url()'s
if value.len() < 1 { if value.len() == 0 {
continue; continue;
} }
if options.no_images && is_image_url_prop(curr_prop.as_str()) { if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str()); result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
} else { } else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default(); let resolved_url: Url = resolve_url(&document_url, value);
let url_fragment = get_url_fragment(resolved_url.clone());
match retrieve_asset( match retrieve_asset(
cache, cache,
client, client,
&parent_url, &document_url,
&resolved_url, &resolved_url,
options, options,
depth + 1, depth + 1,
) { ) {
Ok((data, final_url, media_type)) => { Ok((data, final_url, media_type, charset)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url); let mut data_url =
let assembled_url: String = create_data_url(&media_type, &charset, &data, &final_url);
url_with_fragment(data_url.as_str(), url_fragment.as_str()); data_url.set_fragment(resolved_url.fragment());
result.push_str(enquote(assembled_url, false).as_str()); result.push_str(
format_quoted_string(&data_url.to_string()).as_str(),
);
} }
Err(_) => { Err(_) => {
// Keep remote reference if unable to retrieve the asset // Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) { if resolved_url.scheme() == "http"
let assembled_url: String = url_with_fragment( || resolved_url.scheme() == "https"
resolved_url.as_str(), {
url_fragment.as_str(), result.push_str(
format_quoted_string(&resolved_url.to_string())
.as_str(),
); );
result.push_str(enquote(assembled_url, false).as_str());
} }
} }
} }
} }
} else { } else {
result.push_str(enquote(str!(value), false).as_str()); result.push_str(format_quoted_string(value).as_str());
} }
} }
} }
@ -271,7 +297,7 @@ pub fn process_css<'a>(
if *has_sign && *unit_value >= 0. { if *has_sign && *unit_value >= 0. {
result.push_str("+"); result.push_str("+");
} }
result.push_str(str!(unit_value * 100.0).as_str()); result.push_str(&(unit_value * 100.0).to_string());
result.push_str("%"); result.push_str("%");
} }
Token::Dimension { Token::Dimension {
@ -283,21 +309,22 @@ pub fn process_css<'a>(
if *has_sign && *value >= 0. { if *has_sign && *value >= 0. {
result.push_str("+"); result.push_str("+");
} }
result.push_str(str!(value).as_str()); result.push_str(&value.to_string());
result.push_str(str!(unit).as_str()); result.push_str(&unit.to_string());
} }
// #selector, #id... // #selector, #id...
Token::IDHash(ref value) => { Token::IDHash(ref value) => {
curr_rule = str!(); curr_rule = "".to_string();
result.push_str("#"); result.push_str("#");
result.push_str(&escape(value)); result.push_str(&format_ident(value));
} }
// url()
Token::UnquotedUrl(ref value) => { Token::UnquotedUrl(ref value) => {
let is_import: bool = curr_rule == "import"; let is_import: bool = curr_rule == "import";
if is_import { if is_import {
// Reset current at-rule value // Reset current at-rule value
curr_rule = str!(); curr_rule = "".to_string();
} }
// Skip empty url()'s // Skip empty url()'s
@ -313,13 +340,19 @@ pub fn process_css<'a>(
result.push_str("url("); result.push_str("url(");
if is_import { if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let full_url: Url = resolve_url(&document_url, value);
let url_fragment = get_url_fragment(full_url.clone()); match retrieve_asset(
match retrieve_asset(cache, client, &parent_url, &full_url, options, depth + 1) cache,
{ client,
Ok((css, final_url, _media_type)) => { &document_url,
let data_url = data_to_data_url( &full_url,
"text/css", options,
depth + 1,
) {
Ok((css, final_url, media_type, charset)) => {
let mut data_url = create_data_url(
&media_type,
&charset,
embed_css( embed_css(
cache, cache,
client, client,
@ -331,45 +364,43 @@ pub fn process_css<'a>(
.as_bytes(), .as_bytes(),
&final_url, &final_url,
); );
let assembled_url: String = data_url.set_fragment(full_url.fragment());
url_with_fragment(data_url.as_str(), url_fragment.as_str()); result.push_str(format_quoted_string(&data_url.to_string()).as_str());
result.push_str(enquote(assembled_url, false).as_str());
} }
Err(_) => { Err(_) => {
// Keep remote reference if unable to retrieve the asset // Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) { if full_url.scheme() == "http" || full_url.scheme() == "https" {
let assembled_url: String = result
url_with_fragment(full_url.as_str(), url_fragment.as_str()); .push_str(format_quoted_string(&full_url.to_string()).as_str());
result.push_str(enquote(assembled_url, false).as_str());
} }
} }
} }
} else { } else {
if is_image_url_prop(curr_prop.as_str()) && options.no_images { if is_image_url_prop(curr_prop.as_str()) && options.no_images {
result.push_str(enquote(str!(empty_image!()), false).as_str()); result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
} else { } else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let full_url: Url = resolve_url(&document_url, value);
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset( match retrieve_asset(
cache, cache,
client, client,
&parent_url, &document_url,
&full_url, &full_url,
options, options,
depth + 1, depth + 1,
) { ) {
Ok((data, final_url, media_type)) => { Ok((data, final_url, media_type, charset)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url); let mut data_url =
let assembled_url: String = create_data_url(&media_type, &charset, &data, &final_url);
url_with_fragment(data_url.as_str(), url_fragment.as_str()); data_url.set_fragment(full_url.fragment());
result.push_str(enquote(assembled_url, false).as_str()); result
.push_str(format_quoted_string(&data_url.to_string()).as_str());
} }
Err(_) => { Err(_) => {
// Keep remote reference if unable to retrieve the asset // Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) { if full_url.scheme() == "http" || full_url.scheme() == "https" {
let assembled_url: String = result.push_str(
url_with_fragment(full_url.as_str(), url_fragment.as_str()); format_quoted_string(&full_url.to_string()).as_str(),
result.push_str(enquote(assembled_url, false).as_str()); );
} }
} }
} }
@ -377,6 +408,7 @@ pub fn process_css<'a>(
} }
result.push_str(")"); result.push_str(")");
} }
// =
Token::Delim(ref value) => result.push_str(&value.to_string()), Token::Delim(ref value) => result.push_str(&value.to_string()),
Token::Function(ref name) => { Token::Function(ref name) => {
let function_name: &str = &name.clone(); let function_name: &str = &name.clone();
@ -388,7 +420,7 @@ pub fn process_css<'a>(
process_css( process_css(
cache, cache,
client, client,
parent_url, document_url,
parser, parser,
options, options,
depth, depth,
@ -413,28 +445,3 @@ pub fn process_css<'a>(
Ok(result) Ok(result)
} }
pub fn embed_css(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
parent_url,
&mut parser,
options,
depth,
"",
"",
"",
)
.unwrap()
}

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
const JS_DOM_EVENT_ATTRS: &[&str] = &[ const JS_DOM_EVENT_ATTRS: &'static [&str] = &[
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects": // From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects // https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes") // https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")

View file

@ -1,15 +1,6 @@
#[macro_use]
extern crate clap;
#[macro_use]
mod macros;
pub mod css; pub mod css;
pub mod html; pub mod html;
pub mod js; pub mod js;
pub mod opts; pub mod opts;
pub mod url; pub mod url;
pub mod utils; pub mod utils;
#[cfg(test)]
pub mod tests;

View file

@ -1,17 +0,0 @@
#[macro_export]
macro_rules! str {
() => {
String::new()
};
($val: expr) => {
ToString::to_string(&$val)
};
}
#[macro_export]
macro_rules! empty_image {
() => {
"data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII="
};
}

View file

@ -1,25 +1,23 @@
use encoding_rs::Encoding;
use html5ever::rcdom::RcDom;
use reqwest::blocking::Client; use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap; use std::collections::HashMap;
use std::env;
use std::fs; use std::fs;
use std::io::{self, prelude::*, Error, Write}; use std::io::{self, prelude::*, Error, Write};
use std::path::Path; use std::path::Path;
use std::process; use std::process;
use std::time::Duration; use std::time::Duration;
use url::Url;
use monolith::html::{ use monolith::html::{
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url, add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
stringify_document, walk_and_embed_assets, serialize_document, set_base_url, set_charset, walk_and_embed_assets,
}; };
use monolith::opts::Options; use monolith::opts::Options;
use monolith::url::{ use monolith::url::{create_data_url, resolve_url};
data_to_data_url, is_data_url, is_file_url, is_http_url, parse_data_url, resolve_url,
};
use monolith::utils::retrieve_asset; use monolith::utils::retrieve_asset;
mod macros;
enum Output { enum Output {
Stdout(io::Stdout), Stdout(io::Stdout),
File(fs::File), File(fs::File),
@ -34,115 +32,168 @@ impl Output {
} }
} }
fn writeln_str(&mut self, s: &str) -> Result<(), Error> { fn write(&mut self, bytes: &Vec<u8>) -> Result<(), Error> {
match self { match self {
Output::Stdout(stdout) => { Output::Stdout(stdout) => {
writeln!(stdout, "{}", s)?; stdout.write_all(bytes)?;
// Ensure newline at end of output
if bytes.last() != Some(&b"\n"[0]) {
stdout.write(b"\n")?;
}
stdout.flush() stdout.flush()
} }
Output::File(f) => { Output::File(file) => {
writeln!(f, "{}", s)?; file.write_all(bytes)?;
f.flush() // Ensure newline at end of output
if bytes.last() != Some(&b"\n"[0]) {
file.write(b"\n")?;
}
file.flush()
} }
} }
} }
} }
pub fn read_stdin() -> String { pub fn read_stdin() -> Vec<u8> {
let mut buffer = String::new(); let mut buffer: Vec<u8> = vec![];
for line in io::stdin().lock().lines() {
buffer += line.unwrap_or_default().as_str(); match io::stdin().lock().read_to_end(&mut buffer) {
buffer += "\n"; Ok(_) => buffer,
Err(_) => buffer,
} }
buffer
} }
fn main() { fn main() {
let options = Options::from_args(); let options = Options::from_args();
let original_target: &str = &options.target;
let target_url: &str;
let mut base_url: String;
let mut dom;
let mut use_stdin: bool = false;
// Pre-process the input // Check if target was provided
let cwd_normalized: String = if options.target.len() == 0 {
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let path = Path::new(original_target);
let mut target: String = str!(original_target.clone()).replace("\\", "/");
let path_is_relative: bool = path.is_relative();
// Determine exact target URL
if target.clone().len() == 0 {
if !options.silent { if !options.silent {
eprintln!("No target specified"); eprintln!("No target specified");
} }
process::exit(1); process::exit(1);
} else if target.clone() == "-" { }
// Check if custom charset is valid
if let Some(custom_charset) = options.charset.clone() {
if !Encoding::for_label_no_replacement(custom_charset.as_bytes()).is_some() {
eprintln!("Unknown encoding: {}", &custom_charset);
process::exit(1);
}
}
let mut use_stdin: bool = false;
let target_url = match options.target.as_str() {
"-" => {
// Read from pipe (stdin) // Read from pipe (stdin)
use_stdin = true; use_stdin = true;
// Default target URL to empty data URL; the user can control it via --base-url // Set default target URL to an empty data URL; the user can set it via --base-url
target_url = "data:text/html," Url::parse("data:text/html,").unwrap()
} else if is_http_url(target.clone()) || is_data_url(target.clone()) { }
target_url = target.as_str(); target => match Url::parse(&target) {
} else if is_file_url(target.clone()) { Ok(url) => match url.scheme() {
target_url = target.as_str(); "data" | "file" | "http" | "https" => url,
} else if path.exists() { unsupported_scheme => {
if !path.is_file() {
if !options.silent { if !options.silent {
eprintln!("Local target is not a file: {}", original_target); eprintln!("Unsupported target URL type: {}", unsupported_scheme);
}
process::exit(1)
}
},
Err(_) => {
// Failed to parse given base URL (perhaps it's a filesystem path?)
let path: &Path = Path::new(&target);
match path.exists() {
true => match path.is_file() {
true => {
let canonical_path = fs::canonicalize(&path).unwrap();
match Url::from_file_path(canonical_path) {
Ok(url) => url,
Err(_) => {
if !options.silent {
eprintln!(
"Could not generate file URL out of given path: {}",
&target
);
} }
process::exit(1); process::exit(1);
} }
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
if path_is_relative {
target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
target.insert_str(
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
"/",
);
} }
target_url = target.as_str();
} else {
target.insert_str(0, "http://");
target_url = target.as_str();
} }
false => {
// Define output if !options.silent {
let mut output = Output::new(&options.output).expect("Could not prepare output"); eprintln!("Local target is not a file: {}", &target);
}
process::exit(1);
}
},
false => {
// It is not a FS path, now we do what browsers do:
// prepend "http://" and hope it points to a website
Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target))
.unwrap()
}
}
}
},
};
// Initialize client // Initialize client
let mut cache = HashMap::new(); let mut cache = HashMap::new();
let mut header_map = HeaderMap::new(); let mut header_map = HeaderMap::new();
if let Some(user_agent) = &options.user_agent {
header_map.insert( header_map.insert(
USER_AGENT, USER_AGENT,
HeaderValue::from_str(&options.user_agent).expect("Invalid User-Agent header specified"), HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"),
); );
let timeout: u64 = if options.timeout > 0 { }
options.timeout let client = if options.timeout > 0 {
Client::builder().timeout(Duration::from_secs(options.timeout))
} else { } else {
std::u64::MAX / 4 // No timeout is default
}; Client::builder()
let client = Client::builder() }
.timeout(Duration::from_secs(timeout))
.danger_accept_invalid_certs(options.insecure) .danger_accept_invalid_certs(options.insecure)
.default_headers(header_map) .default_headers(header_map)
.build() .build()
.expect("Failed to initialize HTTP client"); .expect("Failed to initialize HTTP client");
// At this stage we assume that the base URL is the same as the target URL // At first we assume that base URL is the same as target URL
base_url = str!(target_url); let mut base_url: Url = target_url.clone();
let data: Vec<u8>;
let mut document_encoding: String = "".to_string();
let mut dom: RcDom;
// Retrieve target document // Retrieve target document
if use_stdin { if use_stdin {
dom = html_to_dom(&read_stdin()); data = read_stdin();
} else if is_file_url(target_url) || is_http_url(target_url) { } else if target_url.scheme() == "file"
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) { || (target_url.scheme() == "http" || target_url.scheme() == "https")
Ok((data, final_url, _media_type)) => { || target_url.scheme() == "data"
if options.base_url.clone().unwrap_or(str!()).is_empty() { {
base_url = final_url match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
Ok((retrieved_data, final_url, media_type, charset)) => {
// Make sure the media type is text/html
if !media_type.eq_ignore_ascii_case("text/html") {
if !options.silent {
eprintln!("Unsupported document media type");
} }
dom = html_to_dom(&String::from_utf8_lossy(&data)); process::exit(1);
}
if options
.base_url
.clone()
.unwrap_or("".to_string())
.is_empty()
{
base_url = final_url;
}
data = retrieved_data;
document_encoding = charset;
} }
Err(_) => { Err(_) => {
if !options.silent { if !options.silent {
@ -151,79 +202,128 @@ fn main() {
process::exit(1); process::exit(1);
} }
} }
} else if is_data_url(target_url) {
let (media_type, data): (String, Vec<u8>) = parse_data_url(target_url);
if !media_type.eq_ignore_ascii_case("text/html") {
if !options.silent {
eprintln!("Unsupported data URL media type");
}
process::exit(1);
}
dom = html_to_dom(&String::from_utf8_lossy(&data));
} else { } else {
process::exit(1); process::exit(1);
} }
// Initial parse
dom = html_to_dom(&data, document_encoding.clone());
// TODO: investigate if charset from filesystem/data URL/HTTP headers
// has say over what's specified in HTML
// Attempt to determine document's charset
if let Some(html_charset) = get_charset(&dom.document) {
if !html_charset.is_empty() {
// Check if the charset specified inside HTML is valid
if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) {
document_encoding = html_charset;
dom = html_to_dom(&data, encoding.name().to_string());
}
}
}
// Use custom base URL if specified, read and use what's in the DOM otherwise // Use custom base URL if specified, read and use what's in the DOM otherwise
if !options.base_url.clone().unwrap_or(str!()).is_empty() { let custom_base_url: String = options.base_url.clone().unwrap_or("".to_string());
if is_data_url(options.base_url.clone().unwrap()) { if custom_base_url.is_empty() {
// No custom base URL is specified
// Try to see if document has BASE element
if let Some(existing_base_url) = get_base_url(&dom.document) {
base_url = resolve_url(&target_url, &existing_base_url);
}
} else {
// Custom base URL provided
match Url::parse(&custom_base_url) {
Ok(parsed_url) => {
if parsed_url.scheme() == "file" {
// File base URLs can only work with
// documents saved from filesystem
if target_url.scheme() == "file" {
base_url = parsed_url;
}
} else {
base_url = parsed_url;
}
}
Err(_) => {
// Failed to parse given base URL, perhaps it's a filesystem path?
if target_url.scheme() == "file" {
// Relative paths could work for documents saved from filesystem
let path: &Path = Path::new(&custom_base_url);
if path.exists() {
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
Ok(file_url) => {
base_url = file_url;
}
Err(_) => {
if !options.silent { if !options.silent {
eprintln!("Data URLs cannot be used as base URL"); eprintln!(
"Could not map given path to base URL: {}",
custom_base_url
);
} }
process::exit(1); process::exit(1);
} else {
base_url = options.base_url.clone().unwrap();
} }
} else { }
if let Some(existing_base_url) = get_base_url(&dom.document) { }
base_url = resolve_url(target_url, existing_base_url).unwrap(); }
}
} }
} }
// Embed remote assets // Traverse through the document and embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0); walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Update or add new BASE tag to reroute network requests and hash-links in the final document // Update or add new BASE element to reroute network requests and hash-links
if let Some(new_base_url) = options.base_url.clone() { if let Some(new_base_url) = options.base_url.clone() {
dom = set_base_url(&dom.document, new_base_url); dom = set_base_url(&dom.document, new_base_url);
} }
// Request and embed /favicon.ico (unless it's already linked in the document) // Request and embed /favicon.ico (unless it's already linked in the document)
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) { if !options.no_images
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap(); && (target_url.scheme() == "http" || target_url.scheme() == "https")
&& !has_favicon(&dom.document)
{
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");
match retrieve_asset( match retrieve_asset(
&mut cache, &mut cache,
&client, &client,
&base_url, &target_url,
&favicon_ico_url, &favicon_ico_url,
&options, &options,
0, 0,
) { ) {
Ok((data, final_url, media_type)) => { Ok((data, final_url, media_type, charset)) => {
let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url); let favicon_data_url: Url =
dom = add_favicon(&dom.document, favicon_data_url); create_data_url(&media_type, &charset, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url.to_string());
} }
Err(_) => { Err(_) => {
// Failed to retrieve favicon.ico // Failed to retrieve /favicon.ico
} }
} }
} }
// Save using specified charset, if given
if let Some(custom_charset) = options.charset.clone() {
document_encoding = custom_charset;
dom = set_charset(dom, document_encoding.clone());
}
// Serialize DOM tree // Serialize DOM tree
let mut result: String = stringify_document(&dom.document, &options); let mut result: Vec<u8> = serialize_document(dom, document_encoding, &options);
// Add metadata tag // Prepend metadata comment tag
if !options.no_metadata { if !options.no_metadata {
let metadata_comment: String = create_metadata_tag(&target_url); let mut metadata_comment: String = create_metadata_tag(&target_url);
result.insert_str(0, &metadata_comment); metadata_comment += "\n";
if metadata_comment.len() > 0 { result.splice(0..0, metadata_comment.as_bytes().to_vec());
result.insert_str(metadata_comment.len(), "\n");
}
} }
// Define output
let mut output = Output::new(&options.output).expect("Could not prepare output");
// Write result into stdout or file // Write result into stdout or file
output output.write(&result).expect("Could not write HTML output");
.writeln_str(&result)
.expect("Could not write HTML output");
} }

View file

@ -1,11 +1,15 @@
use clap::{App, Arg}; use clap::{App, Arg, ArgAction};
use std::env;
#[derive(Default)] #[derive(Default)]
pub struct Options { pub struct Options {
pub no_audio: bool, pub no_audio: bool,
pub base_url: Option<String>, pub base_url: Option<String>,
pub no_css: bool, pub no_css: bool,
pub charset: Option<String>,
pub domains: Option<Vec<String>>,
pub ignore_errors: bool, pub ignore_errors: bool,
pub exclude_domains: bool,
pub no_frames: bool, pub no_frames: bool,
pub no_fonts: bool, pub no_fonts: bool,
pub no_images: bool, pub no_images: bool,
@ -16,12 +20,14 @@ pub struct Options {
pub output: String, pub output: String,
pub silent: bool, pub silent: bool,
pub timeout: u64, pub timeout: u64,
pub user_agent: String, pub user_agent: Option<String>,
pub no_video: bool, pub no_video: bool,
pub target: String, pub target: String,
pub no_color: bool,
pub unwrap_noscript: bool,
} }
const ASCII: &str = " \ const ASCII: &'static str = " \
_____ ______________ __________ ___________________ ___ _____ ______________ __________ ___________________ ___
| \\ / \\ | | | | | | | \\ / \\ | | | | | |
| \\_/ __ \\_| __ | | ___ ___ |__| | | \\_/ __ \\_| __ | | ___ ___ |__| |
@ -31,19 +37,32 @@ const ASCII: &str = " \
|___| |__________| \\_____________________| |___| |___| |___| |___| |__________| \\_____________________| |___| |___| |___|
"; ";
const DEFAULT_NETWORK_TIMEOUT: u64 = 120; const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &str = const DEFAULT_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0"; "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
const ENV_VAR_TERM: &str = "TERM";
impl Options { impl Options {
pub fn from_args() -> Options { pub fn from_args() -> Options {
let app = App::new(env!("CARGO_PKG_NAME")) let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!()) .version(env!("CARGO_PKG_VERSION"))
.author(crate_authors!("\n")) .author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str())
.about(format!("{}\n{}", ASCII, crate_description!()).as_str()) .about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str())
.args_from_usage("-a, --no-audio 'Removes audio sources'") .args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'") .args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
.args_from_usage("-c, --no-css 'Removes CSS'") .args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-C, --charset=[UTF-8] 'Enforces custom encoding'")
.arg(
Arg::with_name("domains")
.short('d')
.long("domains")
.takes_value(true)
.value_name("DOMAINS")
.action(ArgAction::Append)
.help("Whitelist of domains"),
)
.args_from_usage("-e, --ignore-errors 'Ignore network errors'") .args_from_usage("-e, --ignore-errors 'Ignore network errors'")
.args_from_usage("-E, --exclude-domains 'Treat specified domains as blacklist'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'") .args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'") .args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'") .args_from_usage("-i, --no-images 'Removes images'")
@ -51,7 +70,12 @@ impl Options {
.args_from_usage("-j, --no-js 'Removes JavaScript'") .args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'") .args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'") .args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'") .args_from_usage(
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
)
.args_from_usage(
"-o, --output=[document.html] 'Writes output to <file>, use - for STDOUT'",
)
.args_from_usage("-s, --silent 'Suppresses verbosity'") .args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'") .args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'") .args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
@ -61,7 +85,7 @@ impl Options {
.required(true) .required(true)
.takes_value(true) .takes_value(true)
.index(1) .index(1)
.help("URL or file path"), .help("URL or file path, use - for STDIN"),
) )
.get_matches(); .get_matches();
let mut options: Options = Options::default(); let mut options: Options = Options::default();
@ -73,10 +97,18 @@ impl Options {
.to_string(); .to_string();
options.no_audio = app.is_present("no-audio"); options.no_audio = app.is_present("no-audio");
if let Some(base_url) = app.value_of("base-url") { if let Some(base_url) = app.value_of("base-url") {
options.base_url = Some(str!(base_url)); options.base_url = Some(base_url.to_string());
} }
options.no_css = app.is_present("no-css"); options.no_css = app.is_present("no-css");
if let Some(charset) = app.value_of("charset") {
options.charset = Some(charset.to_string());
}
if let Some(domains) = app.get_many::<String>("domains") {
let list_of_domains: Vec<String> = domains.map(|v| v.clone()).collect::<Vec<_>>();
options.domains = Some(list_of_domains);
}
options.ignore_errors = app.is_present("ignore-errors"); options.ignore_errors = app.is_present("ignore-errors");
options.exclude_domains = app.is_present("exclude-domains");
options.no_frames = app.is_present("no-frames"); options.no_frames = app.is_present("no-frames");
options.no_fonts = app.is_present("no-fonts"); options.no_fonts = app.is_present("no-fonts");
options.no_images = app.is_present("no-images"); options.no_images = app.is_present("no-images");
@ -91,12 +123,22 @@ impl Options {
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string()) .unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
.parse::<u64>() .parse::<u64>()
.unwrap(); .unwrap();
options.user_agent = app if let Some(user_agent) = app.value_of("user-agent") {
.value_of("user-agent") options.user_agent = Some(user_agent.to_string());
.unwrap_or(DEFAULT_USER_AGENT) } else {
.to_string(); options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
}
options.unwrap_noscript = app.is_present("unwrap-noscript");
options.no_video = app.is_present("no-video"); options.no_video = app.is_present("no-video");
options.no_color =
env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr);
if let Some(term) = env::var_os(ENV_VAR_TERM) {
if term == "dumb" {
options.no_color = true;
}
}
options options
} }
} }

View file

@ -1,559 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::process::Command;
use tempfile::NamedTempFile;
#[test]
fn print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("").output().unwrap();
// STDOUT should be empty
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"No target specified\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain isolated HTML
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
.unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<style></style>\
</head><body>Hello</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_fonts_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-F")
.arg("data:text/html,<style>@font-face { font-family: myFont; src: url(font.woff); }</style>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no web fonts
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
<style></style>\
</head><body>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-f")
.arg("data:text/html,<iframe src=\"https://duckduckgo.com\"></iframe>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no iframes
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no images
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
</head>\
<body>\
<img src=\"{empty_image}\">\
Hi\
</body>\
</html>\n",
empty_image = empty_image!()
)
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no JS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
<script></script></head>\
<body>Hi</body>\
</html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
"src\\tests\\data\\basic\\local-file.html"
} else {
"src/tests/data/basic/local-file.html"
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/basic/local-file.html\n \
{file}{cwd}/src/tests/data/basic/local-style.css\n \
{file}{cwd}/src/tests/data/basic/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>> {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
"{cwd}\\src\\tests\\data\\basic\\local-file.html",
cwd = cwd.to_str().unwrap()
)
} else {
format!(
"{cwd}/src/tests/data/basic/local-file.html",
cwd = cwd.to_str().unwrap()
)
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain only the target file
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
}
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDOUT should contain HTML with no JS in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script></script></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn embed_file_url_local_asset_within_style_attribute() -> Result<(), Box<dyn std::error::Error>>
{
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_svg = NamedTempFile::new()?;
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
</svg>\n")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n \
{file}{svg_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn css_import_string() -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_css = NamedTempFile::new()?;
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"\
<style>\n\
@charset 'UTF-8';\n\
\n\
@import '{file}{css_path}';\n\
\n\
@import url({file}{css_path});\n\
\n\
@import url('{file}{css_path}')\n\
</style>\n\
",
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain embedded CSS url()'s
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n \
{file}{css_path}\n \
{file}{css_path}\n \
{file}{css_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View file

@ -1,2 +0,0 @@
mod base_url;
mod basic;

View file

@ -1,53 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::css;
#[test]
fn empty_input_single_quotes() {
assert_eq!(css::enquote(str!(""), false), "''");
}
#[test]
fn empty_input_double_quotes() {
assert_eq!(css::enquote(str!(""), true), "\"\"");
}
#[test]
fn apostrophes_single_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), false),
"'It\\'s a lovely day, don\\'t you think?'"
);
}
#[test]
fn apostrophes_double_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), true),
"\"It's a lovely day, don't you think?\""
);
}
#[test]
fn feet_and_inches_single_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), false),
"'5\\'2\", 6\\'5\"'"
);
}
#[test]
fn feet_and_inches_double_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), true),
"\"5'2\\\", 6'5\\\"\""
);
}
}

View file

@ -1,329 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test]
fn basic() {
let cache = &mut HashMap::new();
let html = "<div><P></P></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
#[test]
fn no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = empty_image!()
)
);
}
#[test]
fn no_body_background_images() {
let html =
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script></script>\
<script></script></div></body></html>"
);
}
#[test]
fn discards_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\
</html>"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body></body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
</head>\
<body></body>\
</html>"
);
}
}

View file

@ -1,31 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::data_to_data_url(mime, data.as_bytes(), "");
assert_eq!(
&data_url,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), "");
assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K");
}
}

View file

@ -1,39 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn decode_unicode_characters() {
assert_eq!(
url::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[test]
fn decode_file_url() {
assert_eq!(
url::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn plus_sign() {
assert_eq!(
url::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
);
}
}

View file

@ -1,41 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
}
#[test]
fn decodes_urls() {
if cfg!(windows) {
assert_eq!(
url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
url::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}
}
}

View file

@ -1,48 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn data_url() {
assert_eq!(
url::get_url_fragment(
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
),
"test"
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn https_empty() {
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
}
#[test]
fn no_fragment() {
assert_eq!(url::get_url_fragment("https://kernel.org"), "");
}
#[test]
fn dummy_data_url() {
assert_eq!(url::get_url_fragment("data:text/html,"), "");
}
}

View file

@ -1,52 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn data_url_text_html() {
assert!(url::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn data_url_no_media_type() {
assert!(url::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn https_url() {
assert!(!url::is_data_url("https://kernel.org"));
}
#[test]
fn no_protocol_url() {
assert!(!url::is_data_url("//kernel.org"));
}
#[test]
fn empty_string() {
assert!(!url::is_data_url(""));
}
}

View file

@ -1,83 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn unix_file_url() {
assert!(url::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[test]
fn windows_file_url() {
assert!(url::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn unix_url_with_backslashes() {
assert!(url::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn windows_file_url_with_backslashes() {
assert!(url::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn url_with_no_protocl() {
assert!(!url::is_file_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!url::is_file_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!url::is_file_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn data_url() {
assert!(!url::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn just_word_file() {
assert!(!url::is_file_url("file"));
}
}

View file

@ -1,65 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn http_url() {
assert!(url::is_http_url("http://kernel.org"));
}
#[test]
fn https_url() {
assert!(url::is_http_url("https://www.rust-lang.org/"));
}
#[test]
fn http_url_with_backslashes() {
assert!(url::is_http_url("http:\\\\freebsd.org\\"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn url_with_no_protocol() {
assert!(!url::is_http_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!url::is_http_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!url::is_http_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn data_url() {
assert!(!url::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
}

View file

@ -1,12 +0,0 @@
mod clean_url;
mod data_to_data_url;
mod decode_url;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod parse_data_url;
mod resolve_url;
mod url_has_protocol;
mod url_with_fragment;

View file

@ -1,40 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn url_with_fragment_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "test";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test");
}
#[test]
fn url_with_fragment_empty_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/");
}
#[test]
fn url_with_fragment_data_url() {
let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K";
let fragment = "fragment";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(
&assembled_url,
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K#fragment"
);
}
}

View file

@ -1,167 +1,82 @@
use base64; use base64;
use url::{form_urlencoded, ParseError, Url}; use percent_encoding::percent_decode_str;
use url::Url;
use crate::utils::detect_media_type; use crate::utils::{detect_media_type, parse_content_type};
pub fn clean_url<T: AsRef<str>>(input: T) -> String { pub const EMPTY_IMAGE_DATA_URL: &'static str = "data:image/png;base64,\
let mut url = Url::parse(input.as_ref()).unwrap(); iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=";
// Clear fragment pub fn clean_url(url: Url) -> Url {
let mut url = url.clone();
// Clear fragment (if any)
url.set_fragment(None); url.set_fragment(None);
// Get rid of stray question mark url
if url.query() == Some("") {
url.set_query(None);
} }
// Remove empty trailing ampersand(s) pub fn create_data_url(media_type: &str, charset: &str, data: &[u8], final_asset_url: &Url) -> Url {
let mut result: String = url.to_string(); // TODO: move this block out of this function
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
let media_type: String = if media_type.is_empty() { let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url) detect_media_type(data, &final_asset_url)
} else { } else {
media_type.to_string() media_type.to_string()
}; };
format!("data:{};base64,{}", media_type, base64::encode(data)) let mut data_url: Url = Url::parse("data:,").unwrap();
}
pub fn decode_url(input: String) -> String { let c: String =
let input: String = input.replace("+", "%2B"); if !charset.trim().is_empty() && !charset.trim().eq_ignore_ascii_case("US-ASCII") {
format!(";charset={}", charset.trim())
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else { } else {
str!('=') "".to_string()
}, };
val.to_string(),
] data_url.set_path(format!("{}{};base64,{}", media_type, c, base64::encode(data)).as_str());
.concat()
}) data_url
.collect()
} }
pub fn file_url_to_fs_path(url: &str) -> String { pub fn is_url_and_has_protocol(input: &str) -> bool {
if !is_file_url(url) { match Url::parse(&input) {
return str!(); Ok(parsed_url) => {
return parsed_url.scheme().len() > 0;
} }
Err(_) => {
let cutoff_l = if cfg!(windows) { 8 } else { 7 }; return false;
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
} }
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
match Url::parse(url.as_ref()) {
Ok(parsed_url) => parsed_url.fragment().unwrap_or("").to_string(),
Err(_err) => str!(),
} }
} }
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool { pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
Url::parse(url.as_ref()) let path: String = url.path().to_string();
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn parse_data_url<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len()); let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect(); // Split data URL into meta data and raw data
let raw_data: String = path.chars().skip(comma_loc + 1).collect(); let content_type: String = path.chars().take(comma_loc).collect();
let data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data); // Parse meta data
let (media_type, charset, is_base64) = parse_content_type(&content_type);
let meta_data_items: Vec<&str> = meta_data.split(';').collect(); // Parse raw data into vector of bytes
let mut media_type: String = str!(); let text: String = percent_decode_str(&data).decode_utf8_lossy().to_string();
let mut encoding: &str = ""; let blob: Vec<u8> = if is_base64 {
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![]) base64::decode(&text).unwrap_or(vec![])
} else { } else {
text.as_bytes().to_vec() text.as_bytes().to_vec()
}; };
(media_type, data) (media_type, charset, blob)
} }
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> { pub fn resolve_url(from: &Url, to: &str) -> Url {
let result = if is_http_url(to.as_ref()) { match Url::parse(&to) {
to.as_ref().to_string() Ok(parsed_url) => parsed_url,
} else { Err(_) => match from.join(to) {
Url::parse(from.as_ref())? Ok(joined) => joined,
.join(to.as_ref())? Err(_) => Url::parse("data:,").unwrap(),
.as_ref() },
.to_string()
};
Ok(result)
} }
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
let mut result = str!(&url);
if !fragment.is_empty() {
result += "#";
result += fragment;
}
result
} }

View file

@ -2,13 +2,14 @@ use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE; use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs; use std::fs;
use std::path::Path; use std::path::{Path, PathBuf};
use url::Url;
use crate::opts::Options; use crate::opts::Options;
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url}; use crate::url::{clean_url, parse_data_url};
const INDENT: &str = " ";
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
const MAGIC: [[&[u8]; 2]; 18] = [ const MAGIC: [[&[u8]; 2]; 18] = [
// Image // Image
[b"GIF87a", b"image/gif"], [b"GIF87a", b"image/gif"],
@ -32,81 +33,251 @@ const MAGIC: [[&[u8]; 2]; 18] = [
[b"....moov", b"video/quicktime"], [b"....moov", b"video/quicktime"],
[b"\x1A\x45\xDF\xA3", b"video/webm"], [b"\x1A\x45\xDF\xA3", b"video/webm"],
]; ];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"application/javascript",
"application/json",
"image/svg+xml", "image/svg+xml",
"text/css",
"text/html",
"text/javascript",
"text/plain",
]; ];
pub fn detect_media_type(data: &[u8], url: &str) -> String { pub fn detect_media_type(data: &[u8], url: &Url) -> String {
for item in MAGIC.iter() { // At first attempt to read file's header
if data.starts_with(item[0]) { for magic_item in MAGIC.iter() {
return String::from_utf8(item[1].to_vec()).unwrap(); if data.starts_with(magic_item[0]) {
return String::from_utf8(magic_item[1].to_vec()).unwrap();
} }
} }
if url.to_lowercase().ends_with(".svg") { // If header didn't match any known magic signatures,
return str!("image/svg+xml"); // try to guess media type from file name
let parts: Vec<&str> = url.path().split('/').collect();
detect_media_type_by_file_name(parts.last().unwrap())
} }
str!() pub fn detect_media_type_by_file_name(filename: &str) -> String {
let filename_lowercased: &str = &filename.to_lowercase();
let parts: Vec<&str> = filename_lowercased.split('.').collect();
let mime: &str = match parts.last() {
Some(v) => match *v {
"avi" => "video/avi",
"bmp" => "image/bmp",
"css" => "text/css",
"flac" => "audio/flac",
"gif" => "image/gif",
"htm" | "html" => "text/html",
"ico" => "image/x-icon",
"jpeg" | "jpg" => "image/jpeg",
"js" => "application/javascript",
"json" => "application/json",
"mp3" => "audio/mpeg",
"mp4" | "m4v" => "video/mp4",
"ogg" => "audio/ogg",
"ogv" => "video/ogg",
"pdf" => "application/pdf",
"png" => "image/png",
"svg" => "image/svg+xml",
"swf" => "application/x-shockwave-flash",
"tif" | "tiff" => "image/tiff",
"txt" => "text/plain",
"wav" => "audio/wav",
"webp" => "image/webp",
"woff" => "font/woff",
"woff2" => "font/woff2",
"xml" => "text/xml",
&_ => "",
},
None => "",
};
mime.to_string()
} }
pub fn is_plaintext_media_type(media_type: &str) -> bool { pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> bool {
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str()) if domain_to_match_against.len() == 0 {
return false;
}
if domain_to_match_against == "." {
return true;
}
let domain_partials: Vec<&str> = domain.trim_end_matches(".").rsplit(".").collect();
let domain_to_match_against_partials: Vec<&str> = domain_to_match_against
.trim_end_matches(".")
.rsplit(".")
.collect();
let domain_to_match_against_starts_with_a_dot = domain_to_match_against.starts_with(".");
let mut i: usize = 0;
let l: usize = std::cmp::max(
domain_partials.len(),
domain_to_match_against_partials.len(),
);
let mut ok: bool = true;
while i < l {
// Exit and return false if went out of bounds of domain to match against, and it didn't start with a dot
if !domain_to_match_against_starts_with_a_dot
&& domain_to_match_against_partials.len() < i + 1
{
ok = false;
break;
}
let domain_partial = if domain_partials.len() < i + 1 {
""
} else {
domain_partials.get(i).unwrap()
};
let domain_to_match_against_partial = if domain_to_match_against_partials.len() < i + 1 {
""
} else {
domain_to_match_against_partials.get(i).unwrap()
};
let parts_match = domain_to_match_against_partial.eq_ignore_ascii_case(domain_partial);
if !parts_match && domain_to_match_against_partial.len() != 0 {
ok = false;
break;
}
i += 1;
}
ok
} }
pub fn indent(level: u32) -> String { pub fn indent(level: u32) -> String {
let mut result = str!(); let mut result: String = String::new();
let mut l: u32 = level; let mut l: u32 = level;
while l > 0 { while l > 0 {
result += INDENT; result += " ";
l -= 1; l -= 1;
} }
result result
} }
pub fn is_plaintext_media_type(media_type: &str) -> bool {
media_type.to_lowercase().as_str().starts_with("text/")
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn parse_content_type(content_type: &str) -> (String, String, bool) {
let mut media_type: String = "text/plain".to_string();
let mut charset: String = "US-ASCII".to_string();
let mut is_base64: bool = false;
// Parse meta data
let content_type_items: Vec<&str> = content_type.split(';').collect();
let mut i: i8 = 0;
for item in &content_type_items {
if i == 0 {
if item.trim().len() > 0 {
media_type = item.trim().to_string();
}
} else {
if item.trim().eq_ignore_ascii_case("base64") {
is_base64 = true;
} else if item.trim().starts_with("charset=") {
charset = item.trim().chars().skip(8).collect();
}
}
i += 1;
}
(media_type, charset, is_base64)
}
pub fn retrieve_asset( pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>, cache: &mut HashMap<String, Vec<u8>>,
client: &Client, client: &Client,
parent_url: &str, parent_url: &Url,
url: &str, url: &Url,
options: &Options, options: &Options,
depth: u32, depth: u32,
) -> Result<(Vec<u8>, String, String), reqwest::Error> { ) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
if url.len() == 0 { if url.scheme() == "data" {
let (media_type, charset, data) = parse_data_url(url);
Ok((data, url.clone(), media_type, charset))
} else if url.scheme() == "file" {
// Check if parent_url is also a file: URL (if not, then we don't embed the asset)
if parent_url.scheme() != "file" {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
"Security Error",
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error // Provoke error
client.get("").send()?; client.get("").send()?;
} }
if is_data_url(&url) { let path_buf: PathBuf = url.to_file_path().unwrap().clone();
let (media_type, data) = parse_data_url(url); let path: &Path = path_buf.as_path();
Ok((data, url.to_string(), media_type))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)
if !is_file_url(&parent_url) {
// Provoke error
client.get("").send()?;
}
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
if path.exists() { if path.exists() {
if path.is_dir() {
if !options.silent {
eprintln!(
"{}{}{} (is a directory){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
Err(client.get("").send().unwrap_err())
} else {
if !options.silent { if !options.silent {
eprintln!("{}{}", indent(depth).as_str(), &url); eprintln!("{}{}", indent(depth).as_str(), &url);
} }
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!())) let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
Ok((
file_blob.clone(),
url.clone(),
detect_media_type(&file_blob, url),
"".to_string(),
))
}
} else { } else {
if !options.silent {
eprintln!(
"{}{}{} (not found){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error // Provoke error
Err(client.get("").send().unwrap_err()) Err(client.get("").send().unwrap_err())
} }
} else { } else {
let cache_key: String = clean_url(&url); let cache_key: String = clean_url(url.clone()).as_str().to_string();
if cache.contains_key(&cache_key) { if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it // URL is in cache, we get and return it
@ -116,50 +287,112 @@ pub fn retrieve_asset(
Ok(( Ok((
cache.get(&cache_key).unwrap().to_vec(), cache.get(&cache_key).unwrap().to_vec(),
url.to_string(), url.clone(),
str!(), "".to_string(),
"".to_string(),
)) ))
} else { } else {
if let Some(domains) = &options.domains {
let domain_matches = domains
.iter()
.any(|d| domain_is_within_domain(url.host_str().unwrap(), &d.trim()));
if (options.exclude_domains && domain_matches)
|| (!options.exclude_domains && !domain_matches)
{
return Err(client.get("").send().unwrap_err());
}
}
// URL not in cache, we retrieve the file // URL not in cache, we retrieve the file
match client.get(url).send() { match client.get(url.as_str()).send() {
Ok(mut response) => { Ok(response) => {
if !options.ignore_errors && response.status() != 200 { if !options.ignore_errors && response.status() != reqwest::StatusCode::OK {
if !options.silent { if !options.silent {
eprintln!("Unable to retrieve {} ({})", &url, response.status()); eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
response.status(),
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
} }
// Provoke error // Provoke error
return Err(client.get("").send().unwrap_err()); return Err(client.get("").send().unwrap_err());
} }
let res_url = response.url().to_string(); let response_url: Url = response.url().clone();
if !options.silent { if !options.silent {
if url == res_url { if url.as_str() == response_url.as_str() {
eprintln!("{}{}", indent(depth).as_str(), &url); eprintln!("{}{}", indent(depth).as_str(), &url);
} else { } else {
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url); eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &response_url);
} }
} }
let new_cache_key: String = clean_url(&res_url); let new_cache_key: String = clean_url(response_url.clone()).to_string();
// Convert response into a byte array // Attempt to obtain media type and charset by reading Content-Type header
let mut data: Vec<u8> = vec![]; let content_type: &str = response
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
.headers() .headers()
.get(CONTENT_TYPE) .get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok()) .and_then(|header| header.to_str().ok())
.unwrap_or(""); .unwrap_or("");
let (media_type, charset, _is_base64) = parse_content_type(&content_type);
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
match response.bytes() {
Ok(b) => {
data = b.to_vec();
}
Err(error) => {
if !options.silent {
eprintln!(
"{}{}{}{}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
error,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
}
}
// Add retrieved resource to cache // Add retrieved resource to cache
cache.insert(new_cache_key, data.clone()); cache.insert(new_cache_key, data.clone());
Ok((data, res_url, media_type.to_string())) // Return
Ok((data, response_url, media_type, charset))
}
Err(error) => {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
error,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
Err(client.get("").send().unwrap_err())
} }
Err(error) => Err(error),
} }
} }
} }

View file

@ -0,0 +1,11 @@
<style>
@charset 'UTF-8';
@import 'style.css';
@import url(style.css);
@import url('style.css');
</style>

View file

@ -0,0 +1 @@
body{background-color:#000;color:#fff}

View file

@ -3,8 +3,6 @@
<html lang="en"> <html lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Security-Policy" content="default-src 'unsafe-inline' file:;" />
<title>Local HTML file</title> <title>Local HTML file</title>
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" /> <link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" /> <link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />

View file

@ -0,0 +1,5 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

After

Width:  |  Height:  |  Size: 296 B

View file

@ -0,0 +1 @@
<body><noscript><img src="image.svg" /></noscript></body>

View file

@ -0,0 +1 @@
<body><noscript><h1>JS is not active</h1><noscript><img src="image.svg" /></noscript></noscript></body>

View file

@ -0,0 +1 @@
<body><noscript><script>alert(1);</script><img src="image.svg" /></noscript></body>

View file

@ -0,0 +1,5 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

After

Width:  |  Height:  |  Size: 296 B

View file

@ -0,0 +1 @@
<div style="background-image: url('image.svg')"></div>

View file

@ -0,0 +1,9 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=GB2312"/>
<title>近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 </title>
</head>
<body>
<h1>近七成人减少线下需求 银行数字化转型提速</h1>
</body>
</html>

View file

@ -0,0 +1,8 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
&copy; Some Company
</body>
</html>

View file

@ -12,8 +12,8 @@ mod passing {
use std::process::Command; use std::process::Command;
#[test] #[test]
fn add_new_when_provided() -> Result<(), Box<dyn std::error::Error>> { fn add_new_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd let out = cmd
.arg("-M") .arg("-M")
.arg("-b") .arg("-b")
@ -22,52 +22,48 @@ mod passing {
.output() .output()
.unwrap(); .unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL // STDOUT should contain newly added base URL
assert_eq!( assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(), String::from_utf8_lossy(&out.stdout),
"<html><head>\ "<html><head>\
<base href=\"http://localhost:8000/\"></base>\ <base href=\"http://localhost:8000/\"></base>\
</head><body>Hello, World!</body></html>\n" </head><body>Hello, World!</body></html>\n"
); );
// STDERR should be empty // Exit code should be 0
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0); out.assert().code(0);
Ok(())
} }
#[test] #[test]
fn keep_existing_when_none_provided() -> Result<(), Box<dyn std::error::Error>> { fn keep_existing_when_none_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd let out = cmd
.arg("-M") .arg("-M")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!") .arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output() .output()
.unwrap(); .unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL // STDOUT should contain newly added base URL
assert_eq!( assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(), String::from_utf8_lossy(&out.stdout),
"<html><head>\ "<html><head>\
<base href=\"http://localhost:8000/\">\ <base href=\"http://localhost:8000/\">\
</head><body>Hello, World!</body></html>\n" </head><body>Hello, World!</body></html>\n"
); );
// STDERR should be empty // Exit code should be 0
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0); out.assert().code(0);
Ok(())
} }
#[test] #[test]
fn override_existing_when_provided() -> Result<(), Box<dyn std::error::Error>> { fn override_existing_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd let out = cmd
.arg("-M") .arg("-M")
.arg("-b") .arg("-b")
@ -76,26 +72,24 @@ mod passing {
.output() .output()
.unwrap(); .unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL // STDOUT should contain newly added base URL
assert_eq!( assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(), String::from_utf8_lossy(&out.stdout),
"<html><head>\ "<html><head>\
<base href=\"http://localhost/\">\ <base href=\"http://localhost/\">\
</head><body>Hello, World!</body></html>\n" </head><body>Hello, World!</body></html>\n"
); );
// STDERR should be empty // Exit code should be 0
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0); out.assert().code(0);
Ok(())
} }
#[test] #[test]
fn remove_existing_when_empty_provided() -> Result<(), Box<dyn std::error::Error>> { fn set_existing_to_empty_when_empty_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd let out = cmd
.arg("-M") .arg("-M")
.arg("-b") .arg("-b")
@ -104,20 +98,18 @@ mod passing {
.output() .output()
.unwrap(); .unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL // STDOUT should contain newly added base URL
assert_eq!( assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(), String::from_utf8_lossy(&out.stdout),
"<html><head>\ "<html><head>\
<base href=\"\">\ <base href=\"\">\
</head><body>Hello, World!</body></html>\n" </head><body>Hello, World!</body></html>\n"
); );
// STDERR should be empty // Exit code should be 0
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0); out.assert().code(0);
Ok(())
} }
} }

144
tests/cli/basic.rs Normal file
View file

@ -0,0 +1,144 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::Path;
use std::process::{Command, Stdio};
use url::Url;
#[test]
fn print_help_information() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("-h").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain program name, version, and usage information
// TODO
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn print_version() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("-V").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain program name and version
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn stdin_target_input() {
let mut echo = Command::new("echo")
.arg("Hello from STDIN")
.stdout(Stdio::piped())
.spawn()
.unwrap();
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML created out of STDIN
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn css_import_string() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/css/index.html");
let path_css: &Path = Path::new("tests/_data_/css/style.css");
assert!(path_html.is_file());
assert!(path_css.is_file());
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should list files that got retrieved
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_css}\n \
{file_url_css}\n \
{file_url_css}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_css = Url::from_file_path(fs::canonicalize(&path_css).unwrap()).unwrap(),
)
);
// STDOUT should contain embedded CSS url()'s
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_empty_target() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("").output().unwrap();
// STDERR should contain error description
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"No target specified\n"
);
// STDOUT should be empty
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
// Exit code should be 1
out.assert().code(1);
}
}

233
tests/cli/data_url.rs Normal file
View file

@ -0,0 +1,233 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn isolate_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain isolated HTML
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
</head><body>Hello, World!</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_css_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<style></style>\
</head><body>Hello</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_fonts_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-F")
.arg("data:text/html,<style>@font-face { font-family: myFont; src: url(font.woff); }</style>Hi")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no web fonts
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
<style></style>\
</head><body>Hi</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_frames_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-f")
.arg("data:text/html,<iframe src=\"https://duckduckgo.com\"></iframe>Hi")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no iframes
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_images_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no images
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
</head>\
<body>\
<img src=\"{empty_image}\">\
Hi\
</body>\
</html>\n",
empty_image = EMPTY_IMAGE_DATA_URL,
)
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_js_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no JS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
<script></script></head>\
<body>Hi</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDERR should contain error description
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"Unsupported document media type\n"
);
// STDOUT should contain HTML
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
// Exit code should be 1
out.assert().code(1);
}
#[test]
fn security_disallow_local_assets_within_data_url_targets() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no JS in it
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

271
tests/cli/local_files.rs Normal file
View file

@ -0,0 +1,271 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::{Path, MAIN_SEPARATOR};
use std::process::Command;
use url::Url;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn local_file_target_input_relative_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}basic{s}local-file.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain list of retrieved file URLs, two missing
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{cwd}/tests/_data_/basic/local-file.html\n \
{file}{cwd}/tests/_data_/basic/local-style.css\n \
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n \
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n \
{file}{cwd}/tests/_data_/basic/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn local_file_target_input_absolute_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/basic/local-file.html");
let out = cmd
.arg("-M")
.arg("-Ijci")
.arg(path_html.as_os_str())
.output()
.unwrap();
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file_url_html}\n",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn local_file_url_target_input() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(format!(
"{file}{cwd}/tests/_data_/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
))
.output()
.unwrap();
// STDERR should contain list of retrieved file URLs
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn embed_file_url_local_asset_within_style_attribute() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/svg/index.html");
let path_svg: &Path = Path::new("tests/_data_/svg/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should list files that got retrieved
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><div style=\"background-image: url(&quot;data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=&quot;)\"></div>\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn discard_integrity_for_local_files() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-i")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/tests/_data_/integrity/index.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/tests/_data_/integrity/index.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDERR should contain list of retrieved file URLs
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{cwd}/tests/_data_/integrity/index.html\n \
{file}{cwd}/tests/_data_/integrity/style.css\n \
{file}{cwd}/tests/_data_/integrity/style.css\n \
{file}{cwd}/tests/_data_/integrity/script.js\n \
{file}{cwd}/tests/_data_/integrity/script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain HTML from the local file; integrity attributes should be missing
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNGRkY7Cn0K\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n \
<link href=\"style.css\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n</head>\n\n<body>\n \
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>\n \
<script src=\"data:application/javascript;base64,ZnVuY3Rpb24gbm9vcCgpIHsKICAgIGNvbnNvbGUubG9nKCJtb25vbGl0aCIpOwp9Cg==\"></script>\n \
<script src=\"script.js\"></script>\n\n\n\n\
</body></html>\n\
"
)
);
// Exit code should be 0
out.assert().code(0);
}
}

6
tests/cli/mod.rs Normal file
View file

@ -0,0 +1,6 @@
mod base_url;
mod basic;
mod data_url;
mod local_files;
mod noscript;
mod unusual_encodings;

170
tests/cli/noscript.rs Normal file
View file

@ -0,0 +1,170 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::Path;
use std::process::Command;
use url::Url;
#[test]
fn parse_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><noscript><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"></noscript>\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_nested() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/nested.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_with_script() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/script.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html>\
<head></head>\
<body>\
<!--noscript-->\
<img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\">\
<!--/noscript-->\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_attr_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-n")
.arg("data:text/html,<noscript class=\"\">test</noscript>")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain unwrapped contents of NOSCRIPT element
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

View file

@ -0,0 +1,239 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use encoding_rs::Encoding;
use std::env;
use std::path::MAIN_SEPARATOR;
use std::process::{Command, Stdio};
#[test]
fn properly_save_document_with_gb2312() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document without any modificatons
let s: String;
if let Some(encoding) = Encoding::for_label(b"gb2312") {
let (string, _, _) = encoding.decode(&out.stdout);
s = string.to_string();
} else {
s = String::from_utf8_lossy(&out.stdout).to_string();
}
assert_eq!(
s,
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
<title>线\u{3000}--·-- </title>\n\
</head>\n\
<body>\n \
<h1>线\u{3000}</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_from_stdin() {
let mut echo = Command::new("cat")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.stdout(Stdio::piped())
.spawn()
.unwrap();
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML created out of STDIN
let s: String;
if let Some(encoding) = Encoding::for_label(b"gb2312") {
let (string, _, _) = encoding.decode(&out.stdout);
s = string.to_string();
} else {
s = String::from_utf8_lossy(&out.stdout).to_string();
}
assert_eq!(
s,
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
<title>线\u{3000}--·-- </title>\n\
</head>\n\
<body>\n \
<h1>线\u{3000}</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_custom_charset() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-C")
.arg("utf8")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document without any modificatons
assert_eq!(
String::from_utf8_lossy(&out.stdout).to_string(),
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=utf8\">\n \
<title>线\u{3000}--·-- </title>\n\
</head>\n\
<body>\n \
<h1>线\u{3000}</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_custom_charset_bad() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-C")
.arg("utf0")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
// STDERR should contain error message
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"Unknown encoding: utf0\n"
);
// STDOUT should be empty
assert_eq!(String::from_utf8_lossy(&out.stdout).to_string(), "");
// Exit code should be 1
out.assert().code(1);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::path::MAIN_SEPARATOR;
use std::process::Command;
#[test]
fn change_iso88591_to_utf8_to_properly_display_html_entities() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}iso-8859-1.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/iso-8859-1.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document but with UTF-8 charset
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html>\
<head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n \
</head>\n \
<body>\n \
<EFBFBD> Some Company\n \
\n\n</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

View file

@ -8,35 +8,35 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use reqwest::blocking::Client; use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap; use std::collections::HashMap;
use crate::css; use monolith::css;
use crate::opts::Options; use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test] #[test]
fn empty_input() { fn empty_input() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let options = Options::default(); let options = Options::default();
assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), ""); assert_eq!(
css::embed_css(cache, &client, &document_url, "", &options, 0),
""
);
} }
#[test] #[test]
fn trim_if_empty() { fn trim_if_empty() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let options = Options::default(); let options = Options::default();
assert_eq!( assert_eq!(
css::embed_css( css::embed_css(cache, &client, &document_url, "\t \t ", &options, 0,),
cache,
&client,
"https://doesntmatter.local/",
"\t \t ",
&options,
0,
),
"" ""
); );
} }
@ -45,6 +45,7 @@ mod passing {
fn style_exclude_unquoted_images() { fn style_exclude_unquoted_images() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.no_images = true; options.no_images = true;
options.silent = true; options.silent = true;
@ -58,23 +59,16 @@ mod passing {
height: calc(100vh - 10pt)"; height: calc(100vh - 10pt)";
assert_eq!( assert_eq!(
css::embed_css( css::embed_css(cache, &client, &document_url, &STYLE, &options, 0,),
cache,
&client,
"https://doesntmatter.local/",
&STYLE,
&options,
0,
),
format!( format!(
"/* border: none;*/\ "/* border: none;*/\
background-image: url('{empty_image}'); \ background-image: url(\"{empty_image}\"); \
list-style: url('{empty_image}');\ list-style: url(\"{empty_image}\");\
width:99.998%; \ width:99.998%; \
margin-top: -20px; \ margin-top: -20px; \
line-height: -1; \ line-height: -1; \
height: calc(100vh - 10pt)", height: calc(100vh - 10pt)",
empty_image = empty_image!() empty_image = EMPTY_IMAGE_DATA_URL
) )
); );
} }
@ -83,6 +77,7 @@ mod passing {
fn style_exclude_single_quoted_images() { fn style_exclude_single_quoted_images() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.no_images = true; options.no_images = true;
options.silent = true; options.silent = true;
@ -96,16 +91,16 @@ mod passing {
height: calc(100vh - 10pt)"; height: calc(100vh - 10pt)";
assert_eq!( assert_eq!(
css::embed_css(cache, &client, "", &STYLE, &options, 0), css::embed_css(cache, &client, &document_url, &STYLE, &options, 0),
format!( format!(
"/* border: none;*/\ "/* border: none;*/\
background-image: url('{empty_image}'); \ background-image: url(\"{empty_image}\"); \
list-style: url('{empty_image}');\ list-style: url(\"{empty_image}\");\
width:99.998%; \ width:99.998%; \
margin-top: -20px; \ margin-top: -20px; \
line-height: -1; \ line-height: -1; \
height: calc(100vh - 10pt)", height: calc(100vh - 10pt)",
empty_image = empty_image!() empty_image = EMPTY_IMAGE_DATA_URL
) )
); );
} }
@ -114,19 +109,20 @@ mod passing {
fn style_block() { fn style_block() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("file:///").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.silent = true; options.silent = true;
const CSS: &str = "\ const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \ #id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \ // border: none;\n \
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\ background-image: url(\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\");\n\
}\n\ }\n\
\n\ \n\
html > body {}"; html > body {}";
assert_eq!( assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, &options, 0), css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
CSS CSS
); );
} }
@ -135,6 +131,7 @@ mod passing {
fn attribute_selectors() { fn attribute_selectors() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.silent = true; options.silent = true;
@ -143,38 +140,42 @@ mod passing {
/* Attribute exists */ /* Attribute exists */
} }
[data-value='foo'] { [data-value=\"foo\"] {
/* Attribute has this exact value */ /* Attribute has this exact value */
} }
[data-value*='foo'] { [data-value*=\"foo\"] {
/* Attribute value contains this value somewhere in it */ /* Attribute value contains this value somewhere in it */
} }
[data-value~='foo'] { [data-value~=\"foo\"] {
/* Attribute has this value in a space-separated list somewhere */ /* Attribute has this value in a space-separated list somewhere */
} }
[data-value^='foo'] { [data-value^=\"foo\"] {
/* Attribute value starts with this */ /* Attribute value starts with this */
} }
[data-value|='foo'] { [data-value|=\"foo\"] {
/* Attribute value starts with this in a dash-separated list */ /* Attribute value starts with this in a dash-separated list */
} }
[data-value$='foo'] { [data-value$=\"foo\"] {
/* Attribute value ends with this */ /* Attribute value ends with this */
} }
"; ";
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS); assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
CSS
);
} }
#[test] #[test]
fn import_string() { fn import_string() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.silent = true; options.silent = true;
@ -187,20 +188,13 @@ mod passing {
"; ";
assert_eq!( assert_eq!(
css::embed_css( css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
"\ "\
@charset 'UTF-8';\n\ @charset \"UTF-8\";\n\
\n\ \n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\ @import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\
\n\ \n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\ @import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\
" "
); );
} }
@ -209,6 +203,7 @@ mod passing {
fn hash_urls() { fn hash_urls() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.silent = true; options.silent = true;
@ -223,14 +218,7 @@ mod passing {
"; ";
assert_eq!( assert_eq!(
css::embed_css( css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS CSS
); );
} }
@ -239,6 +227,7 @@ mod passing {
fn transform_percentages_and_degrees() { fn transform_percentages_and_degrees() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.silent = true; options.silent = true;
@ -251,14 +240,7 @@ mod passing {
"; ";
assert_eq!( assert_eq!(
css::embed_css( css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS CSS
); );
} }
@ -267,6 +249,7 @@ mod passing {
fn unusual_indents() { fn unusual_indents() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.silent = true; options.silent = true;
@ -281,14 +264,7 @@ mod passing {
"; ";
assert_eq!( assert_eq!(
css::embed_css( css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS CSS
); );
} }
@ -297,6 +273,7 @@ mod passing {
fn exclude_fonts() { fn exclude_fonts() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default(); let mut options = Options::default();
options.no_fonts = true; options.no_fonts = true;
options.silent = true; options.silent = true;
@ -320,30 +297,74 @@ mod passing {
font-family: 'My Font' Verdana\n\ font-family: 'My Font' Verdana\n\
}\n\ }\n\
"; ";
const CSS_OUT: &str = " \ const CSS_OUT: &str = " \
\n\ \n\
\n\ \n\
#identifier {\n \ #identifier {\n \
font-family: 'My Font' Arial\n\ font-family: \"My Font\" Arial\n\
}\n\ }\n\
\n \ \n \
\n\ \n\
\n\ \n\
div {\n \ div {\n \
font-family: 'My Font' Verdana\n\ font-family: \"My Font\" Verdana\n\
}\n\ }\n\
"; ";
assert_eq!( assert_eq!(
css::embed_css( css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
cache, CSS_OUT
&client, );
"https://doesntmatter.local/", }
&CSS,
&options, #[test]
0, fn content() {
), let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url(data:,) \"\\A\";\n\
white-space: pre }\n\
";
const CSS_OUT: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url(\"data:text/plain;base64,\") \"\\a \";\n\
white-space: pre }\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS_OUT
);
}
#[test]
fn ie_css_hack() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
div#p>svg>foreignObject>section:not(\\9) {\n\
width: 300px;\n\
width: 500px\\9;\n\
}\n\
";
const CSS_OUT: &str = "\
div#p>svg>foreignObject>section:not(\\9) {\n\
width: 300px;\n\
width: 500px\t;\n\
}\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS_OUT CSS_OUT
); );
} }

View file

@ -7,7 +7,7 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::css; use monolith::css;
#[test] #[test]
fn backrgound() { fn backrgound() {
@ -64,7 +64,7 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::css; use monolith::css;
#[test] #[test]
fn empty() { fn empty() {

View file

@ -1,3 +1,2 @@
mod embed_css; mod embed_css;
mod enquote;
mod is_image_url_prop; mod is_image_url_prop;

View file

@ -9,12 +9,12 @@
mod passing { mod passing {
use html5ever::serialize::{serialize, SerializeOpts}; use html5ever::serialize::{serialize, SerializeOpts};
use crate::html; use monolith::html;
#[test] #[test]
fn basic() { fn basic() {
let html = "<div>text</div>"; let html = "<div>text</div>";
let mut dom = html::html_to_dom(&html); let mut dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string()); dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());

View file

@ -7,7 +7,7 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html; use monolith::html;
#[test] #[test]
fn empty_input_sha256() { fn empty_input_sha256() {
@ -51,7 +51,7 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::html; use monolith::html;
#[test] #[test]
fn empty_hash() { fn empty_hash() {

View file

@ -7,8 +7,8 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html; use monolith::html;
use crate::opts::Options; use monolith::opts::Options;
#[test] #[test]
fn isolated() { fn isolated() {
@ -16,7 +16,10 @@ mod passing {
options.isolate = true; options.isolate = true;
let csp_content = html::compose_csp(&options); let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;"); assert_eq!(
csp_content,
"default-src 'unsafe-eval' 'unsafe-inline' data:;"
);
} }
#[test] #[test]
@ -75,6 +78,6 @@ mod passing {
options.no_images = true; options.no_images = true;
let csp_content = html::compose_csp(&options); let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;"); assert_eq!(csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
} }
} }

View file

@ -8,14 +8,15 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use chrono::prelude::*; use chrono::prelude::*;
use reqwest::Url;
use crate::html; use monolith::html;
#[test] #[test]
fn http_url() { fn http_url() {
let url = "http://192.168.1.1/"; let url: Url = Url::parse("http://192.168.1.1/").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url); let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!( assert_eq!(
metadata_comment, metadata_comment,
@ -31,9 +32,9 @@ mod passing {
#[test] #[test]
fn file_url() { fn file_url() {
let url = "file:///home/monolith/index.html"; let url: Url = Url::parse("file:///home/monolith/index.html").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url); let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!( assert_eq!(
metadata_comment, metadata_comment,
@ -48,9 +49,9 @@ mod passing {
#[test] #[test]
fn data_url() { fn data_url() {
let url = "data:text/html,Hello%2C%20World!"; let url: Url = Url::parse("data:text/html,Hello%2C%20World!").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url); let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!( assert_eq!(
metadata_comment, metadata_comment,
@ -63,20 +64,3 @@ mod passing {
); );
} }
} }
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_string() {
assert_eq!(html::create_metadata_tag(""), "");
}
}

View file

@ -8,24 +8,59 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use reqwest::blocking::Client; use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap; use std::collections::HashMap;
use crate::html; use monolith::html;
use crate::opts::Options; use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test] #[test]
fn replace_with_empty_images() { fn small_medium_large() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x"; let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x";
let mut options = Options::default(); let mut options = Options::default();
options.no_images = true; options.no_images = true;
options.silent = true; options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!( assert_eq!(
embedded_css, embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()), format!(
"{} 1x, {} 1.5x, {} 2x",
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL,
),
);
}
#[test]
fn small_medium_only_medium_has_scale() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png, medium.png 1.5x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
format!("{}, {} 1.5x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
); );
} }
@ -37,11 +72,18 @@ mod passing {
let mut options = Options::default(); let mut options = Options::default();
options.no_images = true; options.no_images = true;
options.silent = true; options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!( assert_eq!(
embedded_css, embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()), format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
); );
} }
@ -53,15 +95,20 @@ mod passing {
let mut options = Options::default(); let mut options = Options::default();
options.no_images = true; options.no_images = true;
options.silent = true; options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!( assert_eq!(
embedded_css, embedded_css,
format!( format!(
"{} 1x, {} 2x, {} 3x", "{} 1x, {} 2x, {} 3x",
empty_image!(), EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
empty_image!(),
empty_image!()
), ),
); );
} }
@ -77,10 +124,12 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use reqwest::blocking::Client; use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap; use std::collections::HashMap;
use crate::html; use monolith::html;
use crate::opts::Options; use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test] #[test]
fn trailing_comma() { fn trailing_comma() {
@ -90,11 +139,18 @@ mod failing {
let mut options = Options::default(); let mut options = Options::default();
options.no_images = true; options.no_images = true;
options.silent = true; options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!( assert_eq!(
embedded_css, embedded_css,
format!("{} 1x, {} 2x,", empty_image!(), empty_image!()), format!("{} 1x, {} 2x,", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
); );
} }
} }

View file

@ -7,7 +7,7 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html; use monolith::html;
#[test] #[test]
fn present() { fn present() {
@ -19,11 +19,11 @@ mod passing {
<body> <body>
</body> </body>
</html>"; </html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!( assert_eq!(
html::get_base_url(&dom.document), html::get_base_url(&dom.document),
Some(str!("https://musicbrainz.org")) Some("https://musicbrainz.org".to_string())
); );
} }
@ -38,11 +38,11 @@ mod passing {
<body> <body>
</body> </body>
</html>"; </html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!( assert_eq!(
html::get_base_url(&dom.document), html::get_base_url(&dom.document),
Some(str!("https://www.discogs.com/")) Some("https://www.discogs.com/".to_string())
); );
} }
} }
@ -56,7 +56,7 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::html; use monolith::html;
#[test] #[test]
fn absent() { fn absent() {
@ -67,7 +67,7 @@ mod failing {
<body> <body>
</body> </body>
</html>"; </html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), None); assert_eq!(html::get_base_url(&dom.document), None);
} }
@ -82,7 +82,7 @@ mod failing {
<body> <body>
</body> </body>
</html>"; </html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), None); assert_eq!(html::get_base_url(&dom.document), None);
} }
@ -97,8 +97,8 @@ mod failing {
<body> <body>
</body> </body>
</html>"; </html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), Some(str!())); assert_eq!(html::get_base_url(&dom.document), Some("".to_string()));
} }
} }

72
tests/html/get_charset.rs Normal file
View file

@ -0,0 +1,72 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
#[test]
fn meta_content_type() {
let html = "<!doctype html>
<html>
<head>
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
#[test]
fn meta_charset() {
let html = "<!doctype html>
<html>
<head>
<meta charset=\"GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
#[test]
fn multiple_conflicting_meta_charset_first() {
let html = "<!doctype html>
<html>
<head>
<meta charset=\"utf-8\" />
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("utf-8".to_string()));
}
#[test]
fn multiple_conflicting_meta_content_type_first() {
let html = "<!doctype html>
<html>
<head>
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
<meta charset=\"utf-8\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
}

View file

@ -9,12 +9,12 @@
mod passing { mod passing {
use html5ever::rcdom::{Handle, NodeData}; use html5ever::rcdom::{Handle, NodeData};
use crate::html; use monolith::html;
#[test] #[test]
fn div_two_style_attributes() { fn div_two_style_attributes() {
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>"; let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0; let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) { fn test_walk(node: &Handle, i: &mut i8) {
@ -35,7 +35,7 @@ mod passing {
} else if node_name == "div" { } else if node_name == "div" {
assert_eq!( assert_eq!(
html::get_node_attr(node, "style"), html::get_node_attr(node, "style"),
Some(str!("color: blue;")) Some("color: blue;".to_string())
); );
} }

View file

@ -9,12 +9,12 @@
mod passing { mod passing {
use html5ever::rcdom::{Handle, NodeData}; use html5ever::rcdom::{Handle, NodeData};
use crate::html; use monolith::html;
#[test] #[test]
fn parent_node_names() { fn parent_node_names() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>"; let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0; let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) { fn test_walk(node: &Handle, i: &mut i8) {

View file

@ -7,12 +7,12 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html; use monolith::html;
#[test] #[test]
fn icon() { fn icon() {
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>"; let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document); let res: bool = html::has_favicon(&dom.document);
assert!(res); assert!(res);
@ -21,7 +21,7 @@ mod passing {
#[test] #[test]
fn shortcut_icon() { fn shortcut_icon() {
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>"; let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document); let res: bool = html::has_favicon(&dom.document);
assert!(res); assert!(res);
@ -37,12 +37,12 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::html; use monolith::html;
#[test] #[test]
fn absent() { fn absent() {
let html = "<div>text</div>"; let html = "<div>text</div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document); let res: bool = html::has_favicon(&dom.document);
assert!(!res); assert!(!res);

View file

@ -7,7 +7,7 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html; use monolith::html;
#[test] #[test]
fn icon() { fn icon() {
@ -34,7 +34,7 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::html; use monolith::html;
#[test] #[test]
fn mask_icon() { fn mask_icon() {

View file

@ -4,10 +4,11 @@ mod compose_csp;
mod create_metadata_tag; mod create_metadata_tag;
mod embed_srcset; mod embed_srcset;
mod get_base_url; mod get_base_url;
mod get_charset;
mod get_node_attr; mod get_node_attr;
mod get_node_name; mod get_node_name;
mod has_favicon; mod has_favicon;
mod is_icon; mod is_icon;
mod serialize_document;
mod set_node_attr; mod set_node_attr;
mod stringify_document;
mod walk_and_embed_assets; mod walk_and_embed_assets;

View file

@ -7,17 +7,17 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html; use monolith::html;
use crate::opts::Options; use monolith::opts::Options;
#[test] #[test]
fn div_as_root_element() { fn div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>"; let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let options = Options::default(); let options = Options::default();
assert_eq!( assert_eq!(
html::stringify_document(&dom.document, &options), String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>" "<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
); );
} }
@ -28,18 +28,19 @@ mod passing {
<link rel=\"something\" href=\"some.css\" />\ <link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\ <meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>"; <div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default(); let mut options = Options::default();
options.isolate = true; options.isolate = true;
assert_eq!( assert_eq!(
html::stringify_document( String::from_utf8_lossy(&html::serialize_document(
&dom.document, dom,
"".to_string(),
&options &options
), )),
"<html>\ "<html>\
<head>\ <head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\ <meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\ <title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\ <link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\ <meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
@ -59,12 +60,12 @@ mod passing {
<title>Unstyled document</title>\ <title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\ <link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>"; <div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default(); let mut options = Options::default();
options.no_css = true; options.no_css = true;
assert_eq!( assert_eq!(
html::stringify_document(&dom.document, &options), String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<!DOCTYPE html>\ "<!DOCTYPE html>\
<html>\ <html>\
<head>\ <head>\
@ -83,15 +84,16 @@ mod passing {
<title>Frameless document</title>\ <title>Frameless document</title>\
<link rel=\"something\"/>\ <link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>"; <div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default(); let mut options = Options::default();
options.no_frames = true; options.no_frames = true;
assert_eq!( assert_eq!(
html::stringify_document( String::from_utf8_lossy(&html::serialize_document(
&dom.document, dom,
"".to_string(),
&options &options
), )),
"<!DOCTYPE html>\ "<!DOCTYPE html>\
<html>\ <html>\
<head>\ <head>\
@ -115,7 +117,7 @@ mod passing {
<img style=\"width: 100%;\" src=\"some.png\" />\ <img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\ <iframe src=\"some.html\"></iframe>\
</div>"; </div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default(); let mut options = Options::default();
options.isolate = true; options.isolate = true;
options.no_css = true; options.no_css = true;
@ -125,14 +127,15 @@ mod passing {
options.no_images = true; options.no_images = true;
assert_eq!( assert_eq!(
html::stringify_document( String::from_utf8_lossy(&html::serialize_document(
&dom.document, dom,
"".to_string(),
&options &options
), )),
"<!DOCTYPE html>\ "<!DOCTYPE html>\
<html>\ <html>\
<head>\ <head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\ <meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\ <title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\ <meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\ <link rel=\"stylesheet\" href=\"some.css\">\

View file

@ -9,12 +9,12 @@
mod passing { mod passing {
use html5ever::rcdom::{Handle, NodeData}; use html5ever::rcdom::{Handle, NodeData};
use crate::html; use monolith::html;
#[test] #[test]
fn html_lang_and_body_style() { fn html_lang_and_body_style() {
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>"; let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0; let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) { fn test_walk(node: &Handle, i: &mut i8) {
@ -31,23 +31,23 @@ mod passing {
let node_name = name.local.as_ref().to_string(); let node_name = name.local.as_ref().to_string();
if node_name == "html" { if node_name == "html" {
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("en"))); assert_eq!(html::get_node_attr(node, "lang"), Some("en".to_string()));
html::set_node_attr(node, "lang", Some(str!("de"))); html::set_node_attr(node, "lang", Some("de".to_string()));
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("de"))); assert_eq!(html::get_node_attr(node, "lang"), Some("de".to_string()));
html::set_node_attr(node, "lang", None); html::set_node_attr(node, "lang", None);
assert_eq!(html::get_node_attr(node, "lang"), None); assert_eq!(html::get_node_attr(node, "lang"), None);
html::set_node_attr(node, "lang", Some(str!(""))); html::set_node_attr(node, "lang", Some("".to_string()));
assert_eq!(html::get_node_attr(node, "lang"), Some(str!(""))); assert_eq!(html::get_node_attr(node, "lang"), Some("".to_string()));
} else if node_name == "body" { } else if node_name == "body" {
assert_eq!(html::get_node_attr(node, "style"), None); assert_eq!(html::get_node_attr(node, "style"), None);
html::set_node_attr(node, "style", Some(str!("display: none;"))); html::set_node_attr(node, "style", Some("display: none;".to_string()));
assert_eq!( assert_eq!(
html::get_node_attr(node, "style"), html::get_node_attr(node, "style"),
Some(str!("display: none;")) Some("display: none;".to_string())
); );
} }
@ -67,7 +67,7 @@ mod passing {
#[test] #[test]
fn body_background() { fn body_background() {
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>"; let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0; let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) { fn test_walk(node: &Handle, i: &mut i8) {
@ -84,7 +84,10 @@ mod passing {
let node_name = name.local.as_ref().to_string(); let node_name = name.local.as_ref().to_string();
if node_name == "body" { if node_name == "body" {
assert_eq!(html::get_node_attr(node, "background"), Some(str!("1"))); assert_eq!(
html::get_node_attr(node, "background"),
Some("1".to_string())
);
html::set_node_attr(node, "background", None); html::set_node_attr(node, "background", None);
assert_eq!(html::get_node_attr(node, "background"), None); assert_eq!(html::get_node_attr(node, "background"), None);

View file

@ -0,0 +1,518 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
use monolith::html;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn basic() {
let cache = &mut HashMap::new();
let html: &str = "<div><P></P></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn no_css() {
let html = "\
<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>\
"
);
}
#[test]
fn no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
}
#[test]
fn no_body_background_images() {
let html =
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
</head>\
<frameset>\
<frame src=\"\">\
</frameset>\
</html>\
"
);
}
#[test]
fn no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head></head>\
<body>\
<iframe src=\"\"></iframe>\
</body>\
</html>\
"
);
}
#[test]
fn no_js() {
let html = "\
<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head></head>\
<body>\
<div>\
<script></script>\
<script></script>\
</div>\
</body>\
</html>\
"
);
}
#[test]
fn keeps_integrity_for_unfamiliar_links() {
let html = "<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\">\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_known_links_nojs_nocss() {
let html = "\
<title>No integrity</title>\
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
<script integrity=\"\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link rel=\"stylesheet\">\
<script></script>\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_embedded_assets() {
let html = "\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\">\
<script></script>\
</head>\
<body>\
</body>\
</html>\
"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "\
<html>\
<head>\
<meta http-equiv=\"Refresh\" content=\"2\"/>\
<meta http-equiv=\"Location\" content=\"https://freebsd.org\"/>\
</head>\
<body>\
</body>\
</html>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<meta content=\"2\">\
<meta content=\"https://freebsd.org\">\
</head>\
<body>\
</body>\
</html>"
);
}
#[test]
fn processes_noscript_tags() {
let html = "\
<html>\
<body>\
<noscript>\
<img src=\"image.png\" />\
</noscript>\
</body>\
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"\
<html>\
<head>\
</head>\
<body>\
<noscript>\
<img src=\"{}\">\
</noscript>\
</body>\
</html>",
EMPTY_IMAGE_DATA_URL,
)
);
}
#[test]
fn preserves_script_type_json() {
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>\
</head>\
<body>\
</body>\
</html>"
);
}
}

View file

@ -7,7 +7,7 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::js; use monolith::js;
#[test] #[test]
fn onblur_camelcase() { fn onblur_camelcase() {
@ -34,7 +34,7 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::js; use monolith::js;
#[test] #[test]
fn href() { fn href() {

View file

@ -2,7 +2,7 @@ mod cli;
mod css; mod css;
mod html; mod html;
mod js; mod js;
mod macros; // mod macros;
mod opts; mod opts;
mod url; mod url;
mod utils; mod utils;

View file

@ -7,15 +7,16 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::opts::Options; use monolith::opts::Options;
#[test] #[test]
fn defaults() { fn defaults() {
let options: Options = Options::default(); let options: Options = Options::default();
assert_eq!(options.target, str!());
assert_eq!(options.no_audio, false); assert_eq!(options.no_audio, false);
assert_eq!(options.base_url, None);
assert_eq!(options.no_css, false); assert_eq!(options.no_css, false);
assert_eq!(options.charset, None);
assert_eq!(options.no_frames, false); assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false); assert_eq!(options.no_fonts, false);
assert_eq!(options.no_images, false); assert_eq!(options.no_images, false);
@ -23,10 +24,12 @@ mod passing {
assert_eq!(options.no_js, false); assert_eq!(options.no_js, false);
assert_eq!(options.insecure, false); assert_eq!(options.insecure, false);
assert_eq!(options.no_metadata, false); assert_eq!(options.no_metadata, false);
assert_eq!(options.output, str!()); assert_eq!(options.output, "".to_string());
assert_eq!(options.silent, false); assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0); assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, ""); assert_eq!(options.user_agent, None);
assert_eq!(options.no_video, false); assert_eq!(options.no_video, false);
assert_eq!(options.target, "".to_string());
} }
} }

View file

@ -7,12 +7,24 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::url; use reqwest::Url;
use monolith::url;
#[test]
fn preserve_original() {
let u: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap();
let clean_u: Url = url::clean_url(u.clone());
assert_eq!(clean_u.as_str(), "https://somewhere.com/font.eot");
assert_eq!(u.as_str(), "https://somewhere.com/font.eot#iefix");
}
#[test] #[test]
fn removes_fragment() { fn removes_fragment() {
assert_eq!( assert_eq!(
url::clean_url("https://somewhere.com/font.eot#iefix"), url::clean_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap()).as_str(),
"https://somewhere.com/font.eot" "https://somewhere.com/font.eot"
); );
} }
@ -20,31 +32,31 @@ mod passing {
#[test] #[test]
fn removes_empty_fragment() { fn removes_empty_fragment() {
assert_eq!( assert_eq!(
url::clean_url("https://somewhere.com/font.eot#"), url::clean_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(),
"https://somewhere.com/font.eot" "https://somewhere.com/font.eot"
); );
} }
#[test] #[test]
fn removes_empty_query_and_empty_fragment() { fn removes_empty_fragment_and_keeps_empty_query() {
assert_eq!( assert_eq!(
url::clean_url("https://somewhere.com/font.eot?#"), url::clean_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(),
"https://somewhere.com/font.eot" "https://somewhere.com/font.eot?"
); );
} }
#[test] #[test]
fn removes_empty_query_amp_and_empty_fragment() { fn removesempty_fragment_and_keeps_empty_query() {
assert_eq!( assert_eq!(
url::clean_url("https://somewhere.com/font.eot?a=b&#"), url::clean_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()).as_str(),
"https://somewhere.com/font.eot?a=b" "https://somewhere.com/font.eot?a=b&"
); );
} }
#[test] #[test]
fn keeps_credentials() { fn keeps_credentials() {
assert_eq!( assert_eq!(
url::clean_url("https://cookie:monster@gibson.internet/"), url::clean_url(Url::parse("https://cookie:monster@gibson.internet/").unwrap()).as_str(),
"https://cookie:monster@gibson.internet/" "https://cookie:monster@gibson.internet/"
); );
} }

View file

@ -0,0 +1,109 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use reqwest::Url;
use monolith::url;
#[test]
fn encode_string_with_specific_media_type() {
let media_type = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::create_data_url(
media_type,
"",
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(
data_url.as_str(),
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = url::create_data_url(
"image/svg+xml",
"",
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(
data_url.as_str(),
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"
);
}
#[test]
fn encode_string_with_specific_media_type_and_charset() {
let media_type = "application/javascript";
let charset = "utf8";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::create_data_url(
media_type,
charset,
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(
data_url.as_str(),
"data:application/javascript;charset=utf8;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn create_data_url_with_us_ascii_charset() {
let media_type = "";
let charset = "us-ascii";
let data = "";
let data_url = url::create_data_url(
media_type,
charset,
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(data_url.as_str(), "data:;base64,");
}
#[test]
fn create_data_url_with_utf8_charset() {
let media_type = "";
let charset = "utf8";
let data = "";
let data_url = url::create_data_url(
media_type,
charset,
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(data_url.as_str(), "data:;charset=utf8;base64,");
}
#[test]
fn create_data_url_with_media_type_text_plain_and_utf8_charset() {
let media_type = "text/plain";
let charset = "utf8";
let data = "";
let data_url = url::create_data_url(
media_type,
charset,
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(data_url.as_str(), "data:text/plain;charset=utf8;base64,");
}
}

View file

@ -7,57 +7,67 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::url; use monolith::url;
#[test] #[test]
fn mailto() { fn mailto() {
assert!(url::url_has_protocol( assert!(url::is_url_and_has_protocol(
"mailto:somebody@somewhere.com?subject=hello" "mailto:somebody@somewhere.com?subject=hello"
)); ));
} }
#[test] #[test]
fn tel() { fn tel() {
assert!(url::url_has_protocol("tel:5551234567")); assert!(url::is_url_and_has_protocol("tel:5551234567"));
} }
#[test] #[test]
fn ftp_no_slashes() { fn ftp_no_slashes() {
assert!(url::url_has_protocol("ftp:some-ftp-server.com")); assert!(url::is_url_and_has_protocol("ftp:some-ftp-server.com"));
} }
#[test] #[test]
fn ftp_with_credentials() { fn ftp_with_credentials() {
assert!(url::url_has_protocol( assert!(url::is_url_and_has_protocol(
"ftp://user:password@some-ftp-server.com" "ftp://user:password@some-ftp-server.com"
)); ));
} }
#[test] #[test]
fn javascript() { fn javascript() {
assert!(url::url_has_protocol("javascript:void(0)")); assert!(url::is_url_and_has_protocol("javascript:void(0)"));
} }
#[test] #[test]
fn http() { fn http() {
assert!(url::url_has_protocol("http://news.ycombinator.com")); assert!(url::is_url_and_has_protocol("http://news.ycombinator.com"));
} }
#[test] #[test]
fn https() { fn https() {
assert!(url::url_has_protocol("https://github.com")); assert!(url::is_url_and_has_protocol("https://github.com"));
}
#[test]
fn file() {
assert!(url::is_url_and_has_protocol("file:///tmp/image.png"));
} }
#[test] #[test]
fn mailto_uppercase() { fn mailto_uppercase() {
assert!(url::url_has_protocol( assert!(url::is_url_and_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello" "MAILTO:somebody@somewhere.com?subject=hello"
)); ));
} }
#[test] #[test]
fn empty_data_url() { fn empty_data_url() {
assert!(url::url_has_protocol("data:text/html,")); assert!(url::is_url_and_has_protocol("data:text/html,"));
}
#[test]
fn empty_data_url_surrounded_by_spaces() {
assert!(url::is_url_and_has_protocol(" data:text/html, "));
} }
} }
@ -70,25 +80,31 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::url; use monolith::url;
#[test] #[test]
fn url_with_no_protocol() { fn url_with_no_protocol() {
assert!(!url::url_has_protocol("//some-hostname.com/some-file.html")); assert_eq!(
url::is_url_and_has_protocol("//some-hostname.com/some-file.html"),
false
);
} }
#[test] #[test]
fn relative_path() { fn relative_path() {
assert!(!url::url_has_protocol("some-hostname.com/some-file.html")); assert_eq!(
url::is_url_and_has_protocol("some-hostname.com/some-file.html"),
false
);
} }
#[test] #[test]
fn relative_to_root_path() { fn relative_to_root_path() {
assert!(!url::url_has_protocol("/some-file.html")); assert_eq!(url::is_url_and_has_protocol("/some-file.html"), false);
} }
#[test] #[test]
fn empty_string() { fn empty_string() {
assert!(!url::url_has_protocol("")); assert_eq!(url::is_url_and_has_protocol(""), false);
} }
} }

5
tests/url/mod.rs Normal file
View file

@ -0,0 +1,5 @@
mod clean_url;
mod create_data_url;
mod is_url_and_has_protocol;
mod parse_data_url;
mod resolve_url;

View file

@ -7,13 +7,16 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::url; use reqwest::Url;
use monolith::url;
#[test] #[test]
fn parse_text_html_base64() { fn parse_text_html_base64() {
let (media_type, data) = url::parse_data_url("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="); let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==").unwrap());
assert_eq!(media_type, "text/html"); assert_eq!(media_type, "text/html");
assert_eq!(charset, "US-ASCII");
assert_eq!( assert_eq!(
String::from_utf8_lossy(&data), String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
@ -22,11 +25,12 @@ mod passing {
#[test] #[test]
fn parse_text_html_utf8() { fn parse_text_html_utf8() {
let (media_type, data) = url::parse_data_url( let (media_type, charset, data) = url::parse_data_url(
"data:text/html;utf8,Work expands so as to fill the time available for its completion", &Url::parse("data:text/html;charset=utf8,Work expands so as to fill the time available for its completion").unwrap(),
); );
assert_eq!(media_type, "text/html"); assert_eq!(media_type, "text/html");
assert_eq!(charset, "utf8");
assert_eq!( assert_eq!(
String::from_utf8_lossy(&data), String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
@ -35,22 +39,15 @@ mod passing {
#[test] #[test]
fn parse_text_html_plaintext() { fn parse_text_html_plaintext() {
let (media_type, data) = url::parse_data_url( let (media_type, charset, data) = url::parse_data_url(
&Url::parse(
"data:text/html,Work expands so as to fill the time available for its completion", "data:text/html,Work expands so as to fill the time available for its completion",
)
.unwrap(),
); );
assert_eq!(media_type, "text/html"); assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(charset, "US-ASCII");
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, data) = url::parse_data_url(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
String::from_utf8_lossy(&data), String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
@ -59,25 +56,31 @@ mod passing {
#[test] #[test]
fn parse_text_css_url_encoded() { fn parse_text_css_url_encoded() {
let (media_type, data) = url::parse_data_url("data:text/css,div{background-color:%23000}"); let (media_type, charset, data) =
url::parse_data_url(&Url::parse("data:text/css,div{background-color:%23000}").unwrap());
assert_eq!(media_type, "text/css"); assert_eq!(media_type, "text/css");
assert_eq!(charset, "US-ASCII");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}"); assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
} }
#[test] #[test]
fn parse_no_media_type_base64() { fn parse_no_media_type_base64() {
let (media_type, data) = url::parse_data_url("data:;base64,dGVzdA=="); let (media_type, charset, data) =
url::parse_data_url(&Url::parse("data:;base64,dGVzdA==").unwrap());
assert_eq!(media_type, ""); assert_eq!(media_type, "text/plain");
assert_eq!(charset, "US-ASCII");
assert_eq!(String::from_utf8_lossy(&data), "test"); assert_eq!(String::from_utf8_lossy(&data), "test");
} }
#[test] #[test]
fn parse_no_media_type_no_encoding() { fn parse_no_media_type_no_encoding() {
let (media_type, data) = url::parse_data_url("data:;,test%20test"); let (media_type, charset, data) =
url::parse_data_url(&Url::parse("data:;,test%20test").unwrap());
assert_eq!(media_type, ""); assert_eq!(media_type, "text/plain");
assert_eq!(charset, "US-ASCII");
assert_eq!(String::from_utf8_lossy(&data), "test test"); assert_eq!(String::from_utf8_lossy(&data), "test test");
} }
} }
@ -91,13 +94,16 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::url; use reqwest::Url;
use monolith::url;
#[test] #[test]
fn just_word_data() { fn empty_data_url() {
let (media_type, data) = url::parse_data_url("data"); let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:,").unwrap());
assert_eq!(media_type, ""); assert_eq!(media_type, "text/plain");
assert_eq!(charset, "US-ASCII");
assert_eq!(String::from_utf8_lossy(&data), ""); assert_eq!(String::from_utf8_lossy(&data), "");
} }
} }

View file

@ -7,26 +7,49 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::url; use reqwest::Url;
use monolith::url;
#[test] #[test]
fn from_https_to_level_up_relative() { fn basic_httsp_relative() {
assert_eq!( assert_eq!(
url::resolve_url("https://www.kernel.org", "../category/signatures.html") url::resolve_url(
.unwrap_or_default(), &Url::parse("https://www.kernel.org").unwrap(),
"https://www.kernel.org/category/signatures.html" "category/signatures.html"
)
.as_str(),
Url::parse("https://www.kernel.org/category/signatures.html")
.unwrap()
.as_str()
); );
} }
#[test] #[test]
fn from_just_filename_to_full_https_url() { fn basic_httsp_absolute() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"saved_page.htm", &Url::parse("https://www.kernel.org").unwrap(),
"https://www.kernel.org/category/signatures.html", "/category/signatures.html"
) )
.unwrap_or_default(), .as_str(),
"https://www.kernel.org/category/signatures.html" Url::parse("https://www.kernel.org/category/signatures.html")
.unwrap()
.as_str()
);
}
#[test]
fn from_https_to_level_up_relative() {
assert_eq!(
url::resolve_url(
&Url::parse("https://www.kernel.org").unwrap(),
"../category/signatures.html"
)
.as_str(),
Url::parse("https://www.kernel.org/category/signatures.html")
.unwrap()
.as_str()
); );
} }
@ -34,10 +57,10 @@ mod passing {
fn from_https_url_to_url_with_no_protocol() { fn from_https_url_to_url_with_no_protocol() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"https://www.kernel.org", &Url::parse("https://www.kernel.org").unwrap(),
"//www.kernel.org/theme/images/logos/tux.png", "//www.kernel.org/theme/images/logos/tux.png",
) )
.unwrap_or_default(), .as_str(),
"https://www.kernel.org/theme/images/logos/tux.png" "https://www.kernel.org/theme/images/logos/tux.png"
); );
} }
@ -46,22 +69,22 @@ mod passing {
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() { fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"https://www.kernel.org", &Url::parse("https://www.kernel.org").unwrap(),
"//another-host.org/theme/images/logos/tux.png", "//another-host.org/theme/images/logos/tux.png",
) )
.unwrap_or_default(), .as_str(),
"https://another-host.org/theme/images/logos/tux.png" "https://another-host.org/theme/images/logos/tux.png"
); );
} }
#[test] #[test]
fn from_https_url_to_relative_root_path() { fn from_https_url_to_absolute_path() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"https://www.kernel.org/category/signatures.html", &Url::parse("https://www.kernel.org/category/signatures.html").unwrap(),
"/theme/images/logos/tux.png", "/theme/images/logos/tux.png",
) )
.unwrap_or_default(), .as_str(),
"https://www.kernel.org/theme/images/logos/tux.png" "https://www.kernel.org/theme/images/logos/tux.png"
); );
} }
@ -70,10 +93,10 @@ mod passing {
fn from_https_to_just_filename() { fn from_https_to_just_filename() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp", &Url::parse("https://www.w3schools.com/html/html_iframe.asp").unwrap(),
"default.asp", "default.asp",
) )
.unwrap_or_default(), .as_str(),
"https://www.w3schools.com/html/default.asp" "https://www.w3schools.com/html/default.asp"
); );
} }
@ -82,10 +105,11 @@ mod passing {
fn from_data_url_to_https() { fn from_data_url_to_https() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap(),
"https://www.kernel.org/category/signatures.html", "https://www.kernel.org/category/signatures.html",
) )
.unwrap_or_default(), .as_str(),
"https://www.kernel.org/category/signatures.html" "https://www.kernel.org/category/signatures.html"
); );
} }
@ -94,10 +118,11 @@ mod passing {
fn from_data_url_to_data_url() { fn from_data_url_to_data_url() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K", "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
) )
.unwrap_or_default(), .as_str(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K" "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
); );
} }
@ -106,10 +131,10 @@ mod passing {
fn from_file_url_to_relative_path() { fn from_file_url_to_relative_path() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"file:///home/user/Websites/my-website/index.html", &Url::parse("file:///home/user/Websites/my-website/index.html").unwrap(),
"assets/images/logo.png", "assets/images/logo.png",
) )
.unwrap_or_default(), .as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png" "file:///home/user/Websites/my-website/assets/images/logo.png"
); );
} }
@ -118,10 +143,10 @@ mod passing {
fn from_file_url_to_relative_path_with_backslashes() { fn from_file_url_to_relative_path_with_backslashes() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html", &Url::parse("file:\\\\\\home\\user\\Websites\\my-website\\index.html").unwrap(),
"assets\\images\\logo.png", "assets\\images\\logo.png",
) )
.unwrap_or_default(), .as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png" "file:///home/user/Websites/my-website/assets/images/logo.png"
); );
} }
@ -130,10 +155,11 @@ mod passing {
fn from_data_url_to_file_url() { fn from_data_url_to_file_url() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap(),
"file:///etc/passwd", "file:///etc/passwd",
) )
.unwrap_or_default(), .as_str(),
"file:///etc/passwd" "file:///etc/passwd"
); );
} }
@ -142,30 +168,35 @@ mod passing {
fn preserve_fragment() { fn preserve_fragment() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"http://doesnt-matter.local/", &Url::parse("http://doesnt-matter.local/").unwrap(),
"css/fonts/fontmarvelous.svg#fontmarvelous", "css/fonts/fontmarvelous.svg#fontmarvelous",
) )
.unwrap_or_default(), .as_str(),
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous" "http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
); );
} }
#[test] #[test]
fn resolve_from_file_url_to_file_url() { fn resolve_from_file_url_to_file_url() {
if cfg!(windows) {
assert_eq!( assert_eq!(
if cfg!(windows) { url::resolve_url(
url::resolve_url("file:///c:/index.html", "file:///c:/image.png") &Url::parse("file:///c:/index.html").unwrap(),
.unwrap_or_default() "file:///c:/image.png"
} else { )
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png") .as_str(),
.unwrap_or_default()
},
if cfg!(windows) {
"file:///c:/image.png" "file:///c:/image.png"
} else {
"file:///tmp/image.png"
}
); );
} else {
assert_eq!(
url::resolve_url(
&Url::parse("file:///tmp/index.html").unwrap(),
"file:///tmp/image.png"
)
.as_str(),
"file:///tmp/image.png"
);
}
} }
} }
@ -178,17 +209,20 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::url; use reqwest::Url;
use monolith::url;
#[test] #[test]
fn from_data_url_to_url_with_no_protocol() { fn from_data_url_to_url_with_no_protocol() {
assert_eq!( assert_eq!(
url::resolve_url( url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap(),
"//www.w3schools.com/html/html_iframe.asp", "//www.w3schools.com/html/html_iframe.asp",
) )
.unwrap_or_default(), .as_str(),
"" "data:,"
); );
} }
} }

View file

@ -7,131 +7,171 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::utils; use reqwest::Url;
use monolith::utils;
#[test] #[test]
fn image_gif87() { fn image_gif87() {
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(utils::detect_media_type(b"GIF87a", &dummy_url), "image/gif");
} }
#[test] #[test]
fn image_gif89() { fn image_gif89() {
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(utils::detect_media_type(b"GIF89a", &dummy_url), "image/gif");
} }
#[test] #[test]
fn image_jpeg() { fn image_jpeg() {
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\xFF\xD8\xFF", &dummy_url),
"image/jpeg"
);
} }
#[test] #[test]
fn image_png() { fn image_png() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!( assert_eq!(
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""), utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", &dummy_url),
"image/png" "image/png"
); );
} }
#[test] #[test]
fn image_svg() { fn image_svg() {
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"<svg ", &dummy_url),
"image/svg+xml"
);
} }
#[test] #[test]
fn image_webp() { fn image_webp() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!( assert_eq!(
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""), utils::detect_media_type(b"RIFF....WEBPVP8 ", &dummy_url),
"image/webp" "image/webp"
); );
} }
#[test] #[test]
fn image_icon() { fn image_icon() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!( assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x00", ""), utils::detect_media_type(b"\x00\x00\x01\x00", &dummy_url),
"image/x-icon" "image/x-icon"
); );
} }
#[test] #[test]
fn image_svg_filename() { fn image_svg_filename() {
let file_url: Url = Url::parse("file:///tmp/local-file.svg").unwrap();
assert_eq!( assert_eq!(
utils::detect_media_type(b"<?xml ", "local-file.svg"), utils::detect_media_type(b"<?xml ", &file_url),
"image/svg+xml" "image/svg+xml"
); );
} }
#[test] #[test]
fn image_svg_url_uppercase() { fn image_svg_url_uppercase() {
assert_eq!( let https_url: Url = Url::parse("https://some-site.com/images/local-file.SVG").unwrap();
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"), assert_eq!(utils::detect_media_type(b"", &https_url), "image/svg+xml");
"image/svg+xml"
);
} }
#[test] #[test]
fn audio_mpeg() { fn audio_mpeg() {
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(utils::detect_media_type(b"ID3", &dummy_url), "audio/mpeg");
} }
#[test] #[test]
fn audio_mpeg_2() { fn audio_mpeg_2() {
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\xFF\x0E", &dummy_url),
"audio/mpeg"
);
} }
#[test] #[test]
fn audio_mpeg_3() { fn audio_mpeg_3() {
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\xFF\x0F", &dummy_url),
"audio/mpeg"
);
} }
#[test] #[test]
fn audio_ogg() { fn audio_ogg() {
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(utils::detect_media_type(b"OggS", &dummy_url), "audio/ogg");
} }
#[test] #[test]
fn audio_wav() { fn audio_wav() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!( assert_eq!(
utils::detect_media_type(b"RIFF....WAVEfmt ", ""), utils::detect_media_type(b"RIFF....WAVEfmt ", &dummy_url),
"audio/wav" "audio/wav"
); );
} }
#[test] #[test]
fn audio_flac() { fn audio_flac() {
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"fLaC", &dummy_url),
"audio/x-flac"
);
} }
#[test] #[test]
fn video_avi() { fn video_avi() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!( assert_eq!(
utils::detect_media_type(b"RIFF....AVI LIST", ""), utils::detect_media_type(b"RIFF....AVI LIST", &dummy_url),
"video/avi" "video/avi"
); );
} }
#[test] #[test]
fn video_mp4() { fn video_mp4() {
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"....ftyp", &dummy_url),
"video/mp4"
);
} }
#[test] #[test]
fn video_mpeg() { fn video_mpeg() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!( assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x0B", ""), utils::detect_media_type(b"\x00\x00\x01\x0B", &dummy_url),
"video/mpeg" "video/mpeg"
); );
} }
#[test] #[test]
fn video_quicktime() { fn video_quicktime() {
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime"); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"....moov", &dummy_url),
"video/quicktime"
);
} }
#[test] #[test]
fn video_webm() { fn video_webm() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!( assert_eq!(
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""), utils::detect_media_type(b"\x1A\x45\xDF\xA3", &dummy_url),
"video/webm" "video/webm"
); );
} }
@ -146,10 +186,16 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::utils; use reqwest::Url;
use monolith::utils;
#[test] #[test]
fn unknown_media_type() { fn unknown_media_type() {
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), ""); let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"abcdef0123456789", &dummy_url),
""
);
} }
} }

View file

@ -0,0 +1,154 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::utils;
#[test]
fn sub_domain_is_within_dotted_sub_domain() {
assert!(utils::domain_is_within_domain(
"news.ycombinator.com",
".news.ycombinator.com"
));
}
#[test]
fn domain_is_within_dotted_domain() {
assert!(utils::domain_is_within_domain(
"ycombinator.com",
".ycombinator.com"
));
}
#[test]
fn sub_domain_is_within_dotted_domain() {
assert!(utils::domain_is_within_domain(
"news.ycombinator.com",
".ycombinator.com"
));
}
#[test]
fn sub_domain_is_within_dotted_top_level_domain() {
assert!(utils::domain_is_within_domain(
"news.ycombinator.com",
".com"
));
}
#[test]
fn domain_is_within_itself() {
assert!(utils::domain_is_within_domain(
"ycombinator.com",
"ycombinator.com"
));
}
#[test]
fn domain_with_trailing_dot_is_within_itself() {
assert!(utils::domain_is_within_domain(
"ycombinator.com.",
"ycombinator.com"
));
}
#[test]
fn domain_with_trailing_dot_is_within_single_dot() {
assert!(utils::domain_is_within_domain("ycombinator.com.", "."));
}
#[test]
fn domain_matches_single_dot() {
assert!(utils::domain_is_within_domain("ycombinator.com", "."));
}
#[test]
fn dotted_domain_must_be_within_dotted_domain() {
assert!(utils::domain_is_within_domain(
".ycombinator.com",
".ycombinator.com"
));
}
#[test]
fn empty_is_within_dot() {
assert!(utils::domain_is_within_domain("", "."));
}
#[test]
fn both_dots() {
assert!(utils::domain_is_within_domain(".", "."));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::utils;
#[test]
fn sub_domain_must_not_be_within_domain() {
assert!(!utils::domain_is_within_domain(
"news.ycombinator.com",
"ycombinator.com"
));
}
#[test]
fn domain_must_not_be_within_top_level_domain() {
assert!(!utils::domain_is_within_domain("ycombinator.com", "com"));
}
#[test]
fn different_domains_must_not_be_within_one_another() {
assert!(!utils::domain_is_within_domain(
"news.ycombinator.com",
"kernel.org"
));
}
#[test]
fn sub_domain_is_not_within_wrong_top_level_domain() {
assert!(!utils::domain_is_within_domain(
"news.ycombinator.com",
"org"
));
}
#[test]
fn dotted_domain_is_not_within_domain() {
assert!(!utils::domain_is_within_domain(
".ycombinator.com",
"ycombinator.com"
));
}
#[test]
fn different_domain_is_not_within_dotted_domain() {
assert!(!utils::domain_is_within_domain(
"www.doodleoptimize.com",
".ycombinator.com"
));
}
#[test]
fn no_domain_can_be_within_empty_domain() {
assert!(!utils::domain_is_within_domain("ycombinator.com", ""));
}
#[test]
fn both_can_not_be_empty() {
assert!(!utils::domain_is_within_domain("", ""));
}
}

View file

@ -7,7 +7,7 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::utils; use monolith::utils;
#[test] #[test]
fn zero() { fn zero() {
@ -28,4 +28,9 @@ mod passing {
fn three() { fn three() {
assert_eq!(utils::indent(3), " "); assert_eq!(utils::indent(3), " ");
} }
#[test]
fn four() {
assert_eq!(utils::indent(4), " ");
}
} }

View file

@ -1,3 +1,5 @@
mod detect_media_type; mod detect_media_type;
mod domain_is_within_domain;
mod indent; mod indent;
mod parse_content_type;
mod retrieve_asset; mod retrieve_asset;

View file

@ -0,0 +1,86 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::utils;
#[test]
fn text_plain_utf8() {
let (media_type, charset, is_base64) = utils::parse_content_type("text/plain;charset=utf8");
assert_eq!(media_type, "text/plain");
assert_eq!(charset, "utf8");
assert!(!is_base64);
}
#[test]
fn text_plain_utf8_spaces() {
let (media_type, charset, is_base64) =
utils::parse_content_type(" text/plain ; charset=utf8 ");
assert_eq!(media_type, "text/plain");
assert_eq!(charset, "utf8");
assert!(!is_base64);
}
#[test]
fn empty() {
let (media_type, charset, is_base64) = utils::parse_content_type("");
assert_eq!(media_type, "text/plain");
assert_eq!(charset, "US-ASCII");
assert!(!is_base64);
}
#[test]
fn base64() {
let (media_type, charset, is_base64) = utils::parse_content_type(";base64");
assert_eq!(media_type, "text/plain");
assert_eq!(charset, "US-ASCII");
assert!(is_base64);
}
#[test]
fn text_html_base64() {
let (media_type, charset, is_base64) = utils::parse_content_type("text/html;base64");
assert_eq!(media_type, "text/html");
assert_eq!(charset, "US-ASCII");
assert!(is_base64);
}
#[test]
fn only_media_type() {
let (media_type, charset, is_base64) = utils::parse_content_type("text/html");
assert_eq!(media_type, "text/html");
assert_eq!(charset, "US-ASCII");
assert!(!is_base64);
}
#[test]
fn only_media_type_colon() {
let (media_type, charset, is_base64) = utils::parse_content_type("text/html;");
assert_eq!(media_type, "text/html");
assert_eq!(charset, "US-ASCII");
assert!(!is_base64);
}
#[test]
fn media_type_gb2312_filename() {
let (media_type, charset, is_base64) =
utils::parse_content_type("text/html;charset=GB2312;filename=index.html");
assert_eq!(media_type, "text/html");
assert_eq!(charset, "GB2312");
assert!(!is_base64);
}
#[test]
fn media_type_filename_gb2312() {
let (media_type, charset, is_base64) =
utils::parse_content_type("text/html;filename=index.html;charset=GB2312");
assert_eq!(media_type, "text/html");
assert_eq!(charset, "GB2312");
assert!(!is_base64);
}
}

View file

@ -8,12 +8,13 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use reqwest::blocking::Client; use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap; use std::collections::HashMap;
use std::env; use std::env;
use crate::opts::Options; use monolith::opts::Options;
use crate::url; use monolith::url;
use crate::utils; use monolith::utils;
#[test] #[test]
fn read_data_url() { fn read_data_url() {
@ -25,24 +26,25 @@ mod passing {
// If both source and target are data URLs, // If both source and target are data URLs,
// ensure the result contains target data URL // ensure the result contains target data URL
let (data, final_url, media_type) = utils::retrieve_asset( let (data, final_url, media_type, charset) = utils::retrieve_asset(
cache, cache,
&client, &client,
"data:text/html;base64,c291cmNl", &Url::parse("data:text/html;base64,c291cmNl").unwrap(),
"data:text/html;base64,dGFyZ2V0", &Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(),
&options, &options,
0, 0,
) )
.unwrap(); .unwrap();
assert_eq!(&media_type, "text/html");
assert_eq!(&charset, "US-ASCII");
assert_eq!( assert_eq!(
url::data_to_data_url(&media_type, &data, &final_url), url::create_data_url(&media_type, &charset, &data, &final_url),
url::data_to_data_url("text/html", "target".as_bytes(), "") Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(),
); );
assert_eq!( assert_eq!(
final_url, final_url,
url::data_to_data_url("text/html", "target".as_bytes(), "") Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(),
); );
assert_eq!(&media_type, "text/html");
} }
#[test] #[test]
@ -57,31 +59,36 @@ mod passing {
// Inclusion of local assets from local sources should be allowed // Inclusion of local assets from local sources should be allowed
let cwd = env::current_dir().unwrap(); let cwd = env::current_dir().unwrap();
let (data, final_url, _media_type) = utils::retrieve_asset( let (data, final_url, media_type, charset) = utils::retrieve_asset(
cache, cache,
&client, &client,
&format!( &Url::parse(&format!(
"{file}{cwd}/src/tests/data/basic/local-file.html", "{file}{cwd}/tests/_data_/basic/local-file.html",
file = file_url_protocol, file = file_url_protocol,
cwd = cwd.to_str().unwrap() cwd = cwd.to_str().unwrap()
), ))
&format!( .unwrap(),
"{file}{cwd}/src/tests/data/basic/local-script.js", &Url::parse(&format!(
"{file}{cwd}/tests/_data_/basic/local-script.js",
file = file_url_protocol, file = file_url_protocol,
cwd = cwd.to_str().unwrap() cwd = cwd.to_str().unwrap()
), ))
.unwrap(),
&options, &options,
0, 0,
) )
.unwrap(); .unwrap();
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); assert_eq!(&media_type, "application/javascript");
assert_eq!(&charset, "");
assert_eq!(url::create_data_url(&media_type, &charset, &data, &final_url), Url::parse("data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==").unwrap());
assert_eq!( assert_eq!(
&final_url, final_url,
&format!( Url::parse(&format!(
"{file}{cwd}/src/tests/data/basic/local-script.js", "{file}{cwd}/tests/_data_/basic/local-script.js",
file = file_url_protocol, file = file_url_protocol,
cwd = cwd.to_str().unwrap() cwd = cwd.to_str().unwrap()
) ))
.unwrap()
); );
} }
} }
@ -96,10 +103,11 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use reqwest::blocking::Client; use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap; use std::collections::HashMap;
use crate::opts::Options; use monolith::opts::Options;
use crate::utils; use monolith::utils;
#[test] #[test]
fn read_local_file_with_data_url_parent() { fn read_local_file_with_data_url_parent() {
@ -113,8 +121,8 @@ mod failing {
match utils::retrieve_asset( match utils::retrieve_asset(
cache, cache,
&client, &client,
"data:text/html;base64,SoUrCe", &Url::parse("data:text/html;base64,SoUrCe").unwrap(),
"file:///etc/passwd", &Url::parse("file:///etc/passwd").unwrap(),
&options, &options,
0, 0,
) { ) {
@ -139,8 +147,8 @@ mod failing {
match utils::retrieve_asset( match utils::retrieve_asset(
cache, cache,
&client, &client,
"https://kernel.org/", &Url::parse("https://kernel.org/").unwrap(),
"file:///etc/passwd", &Url::parse("file:///etc/passwd").unwrap(),
&options, &options,
0, 0,
) { ) {