Compare commits

..

No commits in common. "master" and "v2.1.2" have entirely different histories.

108 changed files with 2639 additions and 9146 deletions

View File

@ -1 +0,0 @@
docs/arch

130
.appveyor.yml Normal file
View File

@ -0,0 +1,130 @@
# Appveyor configuration template for Rust using rustup for Rust installation
# https://github.com/starkat99/appveyor-rust
## Operating System (VM environment) ##
# Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets.
os: Visual Studio 2015
## Build Matrix ##
# This configuration will setup a build for each channel & target combination (12 windows
# combinations in all).
#
# There are 3 channels: stable, beta, and nightly.
#
# Alternatively, the full version may be specified for the channel to build using that specific
# version (e.g. channel: 1.5.0)
#
# The values for target are the set of windows Rust build targets. Each value is of the form
#
# ARCH-pc-windows-TOOLCHAIN
#
# Where ARCH is the target architecture, either x86_64 or i686, and TOOLCHAIN is the linker
# toolchain to use, either msvc or gnu. See https://www.rust-lang.org/downloads.html#win-foot for
# a description of the toolchain differences.
# See https://github.com/rust-lang-nursery/rustup.rs/#toolchain-specification for description of
# toolchains and host triples.
#
# Comment out channel/target combos you do not wish to build in CI.
#
# You may use the `cargoflags` and `RUSTFLAGS` variables to set additional flags for cargo commands
# and rustc, respectively. For instance, you can uncomment the cargoflags lines in the nightly
# channels to enable unstable features when building for nightly. Or you could add additional
# matrix entries to test different combinations of features.
environment:
matrix:
### MSVC Toolchains ###
# Stable 64-bit MSVC
- channel: stable
target: x86_64-pc-windows-msvc
# Stable 32-bit MSVC
- channel: stable
target: i686-pc-windows-msvc
# Beta 64-bit MSVC
- channel: beta
target: x86_64-pc-windows-msvc
# Beta 32-bit MSVC
- channel: beta
target: i686-pc-windows-msvc
# Nightly 64-bit MSVC
- channel: nightly
target: x86_64-pc-windows-msvc
#cargoflags: --features "unstable"
# Nightly 32-bit MSVC
- channel: nightly
target: i686-pc-windows-msvc
#cargoflags: --features "unstable"
### GNU Toolchains ###
# Stable 64-bit GNU
- channel: stable
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
# Stable 32-bit GNU
- channel: stable
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
# Beta 64-bit GNU
- channel: beta
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
# Beta 32-bit GNU
- channel: beta
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
# Nightly 64-bit GNU
- channel: nightly
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
#cargoflags: --features "unstable"
# Nightly 32-bit GNU
- channel: nightly
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
#cargoflags: --features "unstable"
### Allowed failures ###
# See Appveyor documentation for specific details. In short, place any channel or targets you wish
# to allow build failures on (usually nightly at least is a wise choice). This will prevent a build
# or test failure in the matching channels/targets from failing the entire build.
matrix:
allow_failures:
- channel: nightly
- channel: beta
# If you only care about stable channel build failures, uncomment the following line:
#- channel: beta
## Install Script ##
# This is the most important part of the Appveyor configuration. This installs the version of Rust
# specified by the 'channel' and 'target' environment variables from the build matrix. This uses
# rustup to install Rust.
#
# For simple configurations, instead of using the build matrix, you can simply set the
# default-toolchain and default-host manually here.
install:
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
- rustup-init -yv --default-toolchain %channel% --default-host %target%
- set PATH=%PATH%;%USERPROFILE%\.cargo\bin
- if defined MINGW_PATH set PATH=%PATH%;%MINGW_PATH%
- rustc -vV
- cargo -vV
- rustup component add rustfmt
## Build Script ##
# 'cargo test' takes care of building for us, so disable Appveyor's build stage. This prevents
# the "directory does not contain a project or solution file" error.
build: false
# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs
#directly or perform other testing commands. Rust will automatically be placed in the PATH
# environment variable.
test_script:
- cargo test --verbose %cargoflags%

View File

@ -1,35 +0,0 @@
name: GNU/Linux
on:
push:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'docs/'
- 'snap/'
- '.adr-dir'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build:
strategy:
matrix:
os:
- ubuntu-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

View File

@ -1,35 +0,0 @@
name: macOS
on:
push:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'docs/'
- 'snap/'
- '.adr-dir'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build:
strategy:
matrix:
os:
- macos-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

View File

@ -1,35 +0,0 @@
name: Windows
on:
push:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'docs/'
- 'snap/'
- '.adr-dir'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build:
strategy:
matrix:
os:
- windows-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

View File

@ -1,108 +0,0 @@
# CD GitHub Actions workflow for monolith
name: CD
on:
release:
types:
- created
jobs:
windows:
runs-on: windows-2019
steps:
- run: git config --global core.autocrlf false
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
with:
name: monolith.exe
path: target\release\monolith.exe
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_armhf:
runs-on: ubuntu-18.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Prepare cross-platform environment
run: |
sudo mkdir /cross-build
sudo touch /etc/apt/sources.list.d/armhf.list
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
sudo apt-get update
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
sudo dpkg -x libssl1.1*.deb /cross-build
sudo dpkg -x libssl-dev*.deb /cross-build
rustup target add arm-unknown-linux-gnueabihf
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build/usr/lib/arm-linux-gnueabihf -L/cross-build/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
- name: Build the executable
run: cargo build --release --target=arm-unknown-linux-gnueabihf
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-armhf
path: target/arm-unknown-linux-gnueabihf/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_aarch64:
runs-on: ubuntu-18.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Prepare cross-platform environment
run: |
sudo mkdir /cross-build
sudo touch /etc/apt/sources.list.d/arm64.list
echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/arm64.list
sudo apt-get update
sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross
sudo apt-get download libssl1.1:arm64 libssl-dev:arm64
sudo dpkg -x libssl1.1*.deb /cross-build
sudo dpkg -x libssl-dev*.deb /cross-build
rustup target add aarch64-unknown-linux-gnu
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/aarch64-linux-gnu" >> $GITHUB_ENV
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
echo "RUSTFLAGS=-C linker=aarch64-linux-gnu-gcc -L/usr/aarch64-linux-gnu/lib -L/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
- name: Build the executable
run: cargo build --release --target=aarch64-unknown-linux-gnu
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-aarch64
path: target/aarch64-unknown-linux-gnu/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_x86_64:
runs-on: ubuntu-18.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-x86_64
path: target/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -1,49 +0,0 @@
# CI GitHub Actions workflow for monolith
name: CI
on:
pull_request:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'docs/'
- 'snap/'
- '.adr-dir'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build_and_test:
strategy:
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
rust:
- stable
- beta
- nightly
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose
- name: Run tests
run: cargo test --all --locked --verbose
- name: Check code formatting
run: |
rustup component add rustfmt
cargo fmt --all -- --check

3
.gitignore vendored
View File

@ -4,3 +4,6 @@
# These are backup files generated by rustfmt
**/*.rs.bk
# Exclude accidental HTML files
*.html

29
.travis.yml Normal file
View File

@ -0,0 +1,29 @@
language: rust
cache: cargo
sudo: false
os:
- linux
- osx
rust:
- stable
- beta
- nightly
before_script:
- rustup component add rustfmt
script:
- cargo build --all --locked --verbose
- cargo test --all --locked --verbose
- |
if [[ "$TRAVIS_RUST_VERSION" == "stable" ]]; then
cargo fmt --all -- --check
fi
jobs:
allow_failures:
- rust: nightly
fast_finish: true

1872
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
[package]
name = "monolith"
version = "2.6.2"
version = "2.1.2"
edition = "2018"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
@ -8,41 +9,17 @@ authors = [
"Emi Simpson <emi@alchemi.dev>",
"rhysd <lin90162@yahoo.co.jp>",
]
edition = "2018"
description = "CLI tool for saving web pages as a single HTML file"
homepage = "https://github.com/Y2Z/monolith"
repository = "https://github.com/Y2Z/monolith"
readme = "README.md"
keywords = ["web", "http", "html", "download", "command-line"]
categories = ["command-line-utilities", "web-programming"]
include = [
"src/*.rs",
"Cargo.toml",
]
license = "CC0-1.0"
[dependencies]
atty = "0.2.14" # Used for highlighting network errors
base64 = "0.13.0" # Used for integrity attributes
chrono = "0.4.20" # Used for formatting creation timestamp
clap = "3.2.16"
cssparser = "0.29.6"
encoding_rs = "0.8.31"
base64 = "0.10.1"
clap = "2.33.0"
html5ever = "0.24.1"
percent-encoding = "2.1.0"
sha2 = "0.10.2" # Used for calculating checksums during integrity checks
url = "2.2.2"
# Used for parsing srcset and NOSCRIPT
[dependencies.regex]
version = "1.6.0"
default-features = false
features = ["std", "perf-dfa", "unicode-perl"]
lazy_static = "1.4.0"
regex = "1.3.1"
url = "2.1.0"
[dependencies.reqwest]
version = "0.11.11"
version = "0.10.*"
default-features = false
features = ["default-tls", "blocking", "gzip", "brotli", "deflate"]
[dev-dependencies]
assert_cmd = "2.0.4"
features = ["default-tls", "blocking", "gzip"]

View File

@ -1,22 +0,0 @@
FROM ekidd/rust-musl-builder as builder
RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
| grep "tarball_url.*\"," \
| cut -d '"' -f 4)
RUN tar xfz monolith.tar.gz \
&& mv Y2Z-monolith-* monolith \
&& rm monolith.tar.gz
WORKDIR monolith/
RUN make install
FROM alpine
RUN apk update && \
apk add --no-cache openssl && \
rm -rf "/var/cache/apk/*"
COPY --from=builder /home/rust/.cargo/bin/monolith /usr/bin/monolith
WORKDIR /tmp
ENTRYPOINT ["/usr/bin/monolith"]

137
LICENSE
View File

@ -1,121 +1,24 @@
Creative Commons Legal Code
This is free and unencumbered software released into the public domain.
CC0 1.0 Universal
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
HEREUNDER.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
Statement of Purpose
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.
For more information, please refer to <http://unlicense.org>

View File

@ -1,29 +1,16 @@
# Makefile for monolith
.PHONY: all build install run test lint
all: build
.PHONY: all
all: test build
build:
@cargo build --locked
.PHONY: build
test: build
@cargo test --locked
@cargo fmt --all -- --check
.PHONY: test
lint:
@cargo fmt --all --
.PHONY: lint
install:
@cargo install --force --locked --path .
.PHONY: install
uninstall:
@cargo uninstall
.PHONY: uninstall
test:
@cargo test --locked
@cargo fmt --all -- --check
clean:
@cargo clean
.PHONY: clean
lint:
@cargo fmt --all --

180
README.md
View File

@ -1,175 +1,63 @@
[![monolith build status on GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
[![monolith build status on macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
[![monolith build status on Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
[![Travis CI Build Status](https://travis-ci.org/Y2Z/monolith.svg?branch=master)](https://travis-ci.org/Y2Z/monolith)
[![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/ae7soyjih8jg2bv7/branch/master?svg=true)](https://ci.appveyor.com/project/snshn/monolith/branch/master)
```
_____ ______________ __________ ___________________ ___
| \ / \ | | | | | |
| \_/ __ \_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |\ /| |__| _ |__| |____| | | | | __ |
| | \___/ | | \ | | | | | | |
|___| |__________| \_____________________| |___| |___| |___|
___ ___________ __________ ___________________ ___
| \ / \ | | | | | |
| \_/ __ \_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |__| _ |__| |____| | | | | __ |
| |\_/| | \ | | | | | | |
|___| |__________| \____________________| |___| |___| |___|
```
A data hoarders dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
A data hoarder's dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
Unlike the conventional “Save page as”, `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
Unlike the conventional "Save page as", `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
---------------------------------------------------
## Installation
#### Using [Cargo](https://crates.io/crates/monolith)
### From source
$ git clone https://github.com/Y2Z/monolith.git
$ cd monolith
$ cargo install --path .
```console
cargo install monolith
```
#### Via [Homebrew](https://formulae.brew.sh/formula/monolith) (macOS and GNU/Linux)
```console
brew install monolith
```
#### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS)
```console
sudo port install monolith
```
#### Using [Snapcraft](https://snapcraft.io/monolith) (GNU/Linux)
```console
snap install monolith
```
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
```console
pkg install monolith
```
#### Using [FreeBSD ports](https://www.freshports.org/www/monolith/) (FreeBSD)
```console
cd /usr/ports/www/monolith/
make install clean
```
#### Using [pkgsrc](https://pkgsrc.se/www/monolith) (NetBSD, OpenBSD, Haiku, etc)
```console
cd /usr/pkgsrc/www/monolith
make install clean
```
#### Using [containers](https://www.docker.com/)
```console
docker build -t Y2Z/monolith .
sudo install -b dist/run-in-container.sh /usr/local/bin/monolith
```
#### From [source](https://github.com/Y2Z/monolith)
Dependency: `libssl`
```console
git clone https://github.com/Y2Z/monolith.git
cd monolith
make install
```
#### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc)
Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standard CPU architecture.
---------------------------------------------------
### With Homebrew (on macOS and GNU/Linux)
$ brew install monolith
### Using Snapcraft (on GNU/Linux)
$ snap install monolith
## Usage
```console
monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
```
```console
cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
```
---------------------------------------------------
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
## Options
- `-a`: Exclude audio sources
- `-b`: Use custom `base URL`
- `-c`: Exclude CSS
- `-C`: Save document using custom `charset`
- `-d`: Allow retrieving assets only from specified `domain(s)`
- `-e`: Ignore network errors
- `-E`: Avoid retrieving assets located within specified domains
- `-f`: Omit frames
- `-F`: Exclude web fonts
- `-c`: Ignore styles
- `-f`: Exclude iframes
- `-i`: Remove images
- `-I`: Isolate the document
- `-I`: Isolate document
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information
- `-n`: Extract contents of NOSCRIPT elements
- `-o`: Write output to `file` (use “-” for STDOUT)
- `-s`: Be quiet
- `-t`: Adjust `network request timeout`
- `-u`: Provide custom `User-Agent`
- `-v`: Exclude videos
---------------------------------------------------
## Proxies
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
---------------------------------------------------
- `-o`: Write output to file
- `-s`: Silent mode
- `-u`: Specify custom User-Agent
## HTTPS and HTTP proxies
Please set `https_proxy`, `http_proxy` and `no_proxy` environment variables.
## Contributing
Please open an issue if something is wrong, that helps make this project better.
---------------------------------------------------
## Related projects
- Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
- Pagesaver: https://github.com/distributed-mind/pagesaver
- Personal WayBack Machine: https://github.com/popey/pwbm
- Hako: https://github.com/dmpop/hako
- Monk: https://github.com/monk-dev/monk
---------------------------------------------------
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
- `Personal WayBack Machine`: https://github.com/popey/pwbm
- `SingleFile`: https://github.com/gildas-lormeau/SingleFile
## License
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty.
---------------------------------------------------
The Unlicense
<!-- Microtext -->
<sub>Keep in mind that `monolith` is not aware of your browsers session</sub>
<sub>Keep in mind that `monolith` is not aware of your browser's session</sub>

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.2 MiB

View File

@ -1,10 +0,0 @@
#!/bin/sh
DOCKER=docker
PROG_NAME=monolith
if which podman 2>&1 > /dev/null; then
DOCKER=podman
fi
$DOCKER run --rm Y2Z/$PROG_NAME "$@"

View File

@ -1,19 +0,0 @@
# 1. Record architecture decisions
Date: 2019-12-25
## Status
Accepted
## Context
We need to record the architectural decisions made on this project.
## Decision
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
## Consequences
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).

View File

@ -1,19 +0,0 @@
# 2. NOSCRIPT nodes
Date: 2020-04-16
## Status
Accepted
## Context
HTML pages can contain `noscript` nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
## Decision
When the document is being saved with or without JavaScript, each `noscript` node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all `noscript` nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
## Consequences
Saved documents will have contents of all `noscript` nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within `noscript` nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" `noscript` elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.

View File

@ -1,21 +0,0 @@
# 3. Network request timeout
Date: 2020-02-15
## Status
Accepted
## Context
A slow network connection and overloaded server may negatively impact network response time.
## Decision
Make the program simulate behavior of popular web browsers and CLI tools, where the default network response timeout is most often set to 120 seconds.
Instead of featuring retries for timed out network requests, the program should have an option to adjust the timeout length, along with making it indefinite when given "0" as its value.
## Consequences
The user is able to retrieve resources that have long response time, as well as obtain full control over how soon, and if at all, network requests should time out.

View File

@ -1,21 +0,0 @@
# 4. Asset integrity check
Date: 2020-02-23
## Status
Accepted
## Context
In HTML5, `link` and `script` nodes have an attribute named `integrity`, which lets the browser check if the remote file is valid, mostly for the purpose of enhancing page security.
## Decision
In order to replicate the browser's behavior, the program should perform integrity check the same way it does, excluding the linked asset from the final result if such check fails.
The `integrity` attribute should be removed from nodes, as it bears no benefit for resources embedded as data URLs.
## Consequences
Assets that fail to pass the check get excluded from the saved document. Meanwhile, saved documents no longer contain integrity attributes on all `link` and `script` nodes.

View File

@ -1,19 +0,0 @@
# 5. Asset Minimization
Date: 2020-03-14
## Status
Accepted
## Context
It may look like a good idea to make monolith compress retrieved assets while saving the page for the purpose of reducing the resulting document's file size.
## Decision
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool, aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript), it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages saved by monolith, if needed.
## Consequences
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.

View File

@ -1,19 +0,0 @@
# 6. Reload and location `meta` tags
Date: 2020-06-25
## Status
Accepted
## Context
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
## Decision
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
## Consequences
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.

View File

@ -1,19 +0,0 @@
# 7. Network errors
Date: 2020-11-22
## Status
Accepted
## Context
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
## Decision
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
## Consequences
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.

View File

@ -1,40 +0,0 @@
# 8. Base Tag
Date: 2020-12-25
## Status
Accepted
## Context
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
## Decision
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
## Consequences
#### If the base tag does not exist in the source document
- If the base tag does not exist in the source document
- With base URL option provided
- use the specified base URL value to retrieve assets, keep original base URL value in the document
- Without base URL option provided
- download document as usual, do not add base tag
- If the base tag already exists in the source document
- With base URL option provided
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
- Without base URL option provided:
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
The program will obatin ability to get rid of existing base tag values (by provind an empty one).

View File

@ -1,3 +0,0 @@
# References
- https://content-security-policy.com/

View File

@ -1,23 +0,0 @@
# Web apps that can be saved with Monolith
These apps retain all or most of their functionality when saved with Monolith:
## Converse
| Website | https://conversejs.org |
|:-----------------------|:--------------------------------------------------------------------|
| Description | An XMPP client built using web technologies |
| Functionality retained | **full** |
| Command to use | `monolith https://conversejs.org/fullscreen.html > conversejs.html` |
| Monolith version used | 2.2.7 |
## Markdown Tables generator
| Website | https://www.tablesgenerator.com |
|:--------------------------|:-----------------------------------------------------------------------------------------------|
| Description | Tool for creating tables in extended Markdown format |
| Functionality retained | **full** |
| Command to use | `monolith -I https://www.tablesgenerator.com/markdown_tables -o markdown-table-generator.html` |
| Monolith version used | 2.6.1 |

View File

@ -1,25 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
<metadata>
<id>monolith</id>
<version>2.4.0</version>
<title>Monolith</title>
<authors>Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd</authors>
<projectUrl>https://github.com/Y2Z/monolith</projectUrl>
<iconUrl>https://raw.githubusercontent.com/Y2Z/monolith/master/assets/icon/icon.png</iconUrl>
<licenseUrl>https://raw.githubusercontent.com/Y2Z/monolith/master/LICENSE</licenseUrl>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>CLI tool for saving complete web pages as a single HTML file
A data hoarders dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
Unlike the conventional “Save page as”, monolith not only saves the target document, it embeds CSS, image, and JavaScript assets all at once, producing a single HTML5 document that is a joy to store and share.
If compared to saving websites using wget, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
</description>
<copyright>Public Domain</copyright>
<language>en-US</language>
<tags>scraping archiving</tags>
<docsUrl>https://github.com/Y2Z/monolith/blob/master/README.md</docsUrl>
</metadata>
</package>

View File

@ -18,11 +18,11 @@ description: |
confinement: strict
# Building on armhf fails, so we specify all supported non-armhf architectures
architectures:
- build-on: amd64
- build-on: arm64
- build-on: armhf
- build-on: i386
- build-on: arm64
- build-on: ppc64el
- build-on: s390x

65
src/args.rs Normal file
View File

@ -0,0 +1,65 @@
use clap::{App, Arg};
#[derive(Default)]
pub struct AppArgs {
pub url_target: String,
pub no_css: bool,
pub no_frames: bool,
pub no_images: bool,
pub no_js: bool,
pub insecure: bool,
pub isolate: bool,
pub output: String,
pub silent: bool,
pub user_agent: String,
}
const DEFAULT_USER_AGENT: &str =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
impl AppArgs {
pub fn get() -> AppArgs {
let app = App::new("monolith")
.version(crate_version!())
.author(crate_authors!("\n"))
.about(crate_description!())
.arg(
Arg::with_name("url")
.required(true)
.takes_value(true)
.index(1)
.help("URL to download"),
)
// .args_from_usage("-a, --include-audio 'Embed audio sources'")
.args_from_usage("-c, --no-css 'Ignore styles'")
.args_from_usage("-f, --no-frames 'Exclude iframes'")
.args_from_usage("-i, --no-images 'Remove images'")
.args_from_usage("-I, --isolate 'Cut off from the Internet'")
.args_from_usage("-j, --no-js 'Exclude JavaScript'")
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
.args_from_usage("-s, --silent 'Suppress verbosity'")
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
// .args_from_usage("-v, --include-video 'Embed video sources'")
.get_matches();
let mut app_args = AppArgs::default();
// Process the command
app_args.url_target = app
.value_of("url")
.expect("please set target url")
.to_string();
app_args.no_css = app.is_present("no-css");
app_args.no_frames = app.is_present("no-frames");
app_args.no_images = app.is_present("no-images");
app_args.no_js = app.is_present("no-js");
app_args.insecure = app.is_present("insecure");
app_args.isolate = app.is_present("isolate");
app_args.silent = app.is_present("silent");
app_args.output = app.value_of("output").unwrap_or("").to_string();
app_args.user_agent = app
.value_of("user-agent")
.unwrap_or(DEFAULT_USER_AGENT)
.to_string();
app_args
}
}

View File

@ -1,447 +0,0 @@
use cssparser::{
serialize_identifier, serialize_string, ParseError, Parser, ParserInput, SourcePosition, Token,
};
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
use crate::opts::Options;
use crate::url::{create_data_url, resolve_url, EMPTY_IMAGE_DATA_URL};
use crate::utils::retrieve_asset;
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
// Universal
"background",
"background-image",
"border-image",
"border-image-source",
"content",
"cursor",
"list-style",
"list-style-image",
"mask",
"mask-image",
// Specific to @counter-style
"additive-symbols",
"negative",
"pad",
"prefix",
"suffix",
"symbols",
];
pub fn embed_css(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
document_url: &Url,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
document_url,
&mut parser,
options,
depth,
"",
"",
"",
)
.unwrap()
}
pub fn format_ident(ident: &str) -> String {
let mut res: String = "".to_string();
let _ = serialize_identifier(ident, &mut res);
res = res.trim_end().to_string();
res
}
pub fn format_quoted_string(string: &str) -> String {
let mut res: String = "".to_string();
let _ = serialize_string(string, &mut res);
res
}
pub fn is_image_url_prop(prop_name: &str) -> bool {
CSS_PROPS_WITH_IMAGE_URLS
.iter()
.find(|p| prop_name.eq_ignore_ascii_case(p))
.is_some()
}
pub fn process_css<'a>(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
document_url: &Url,
parser: &mut Parser,
options: &Options,
depth: u32,
rule_name: &str,
prop_name: &str,
func_name: &str,
) -> Result<String, ParseError<'a, String>> {
let mut result: String = "".to_string();
let mut curr_rule: String = rule_name.clone().to_string();
let mut curr_prop: String = prop_name.clone().to_string();
let mut token: &Token;
let mut token_offset: SourcePosition;
loop {
token_offset = parser.position();
token = match parser.next_including_whitespace_and_comments() {
Ok(token) => token,
Err(_) => {
break;
}
};
match *token {
Token::Comment(_) => {
let token_slice = parser.slice_from(token_offset);
result.push_str(token_slice);
}
Token::Semicolon => result.push_str(";"),
Token::Colon => result.push_str(":"),
Token::Comma => result.push_str(","),
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
if options.no_fonts && curr_rule == "font-face" {
continue;
}
let closure: &str;
if token == &Token::ParenthesisBlock {
result.push_str("(");
closure = ")";
} else if token == &Token::SquareBracketBlock {
result.push_str("[");
closure = "]";
} else {
result.push_str("{");
closure = "}";
}
let block_css: String = parser
.parse_nested_block(|parser| {
process_css(
cache,
client,
document_url,
parser,
options,
depth,
rule_name,
curr_prop.as_str(),
func_name,
)
})
.unwrap();
result.push_str(block_css.as_str());
result.push_str(closure);
}
Token::CloseParenthesis => result.push_str(")"),
Token::CloseSquareBracket => result.push_str("]"),
Token::CloseCurlyBracket => result.push_str("}"),
Token::IncludeMatch => result.push_str("~="),
Token::DashMatch => result.push_str("|="),
Token::PrefixMatch => result.push_str("^="),
Token::SuffixMatch => result.push_str("$="),
Token::SubstringMatch => result.push_str("*="),
Token::CDO => result.push_str("<!--"),
Token::CDC => result.push_str("-->"),
Token::WhiteSpace(ref value) => {
result.push_str(value);
}
// div...
Token::Ident(ref value) => {
curr_rule = "".to_string();
curr_prop = value.to_string();
result.push_str(&format_ident(value));
}
// @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => {
curr_rule = value.to_string();
if options.no_fonts && curr_rule == "font-face" {
continue;
}
result.push_str("@");
result.push_str(value);
}
Token::Hash(ref value) => {
result.push_str("#");
result.push_str(value);
}
Token::QuotedString(ref value) => {
if curr_rule == "import" {
// Reset current at-rule value
curr_rule = "".to_string();
// Skip empty import values
if value.len() == 0 {
result.push_str("''");
continue;
}
let import_full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&import_full_url,
options,
depth + 1,
) {
Ok((
import_contents,
import_final_url,
import_media_type,
import_charset,
)) => {
let mut import_data_url = create_data_url(
&import_media_type,
&import_charset,
embed_css(
cache,
client,
&import_final_url,
&String::from_utf8_lossy(&import_contents),
options,
depth + 1,
)
.as_bytes(),
&import_final_url,
);
import_data_url.set_fragment(import_full_url.fragment());
result.push_str(
format_quoted_string(&import_data_url.to_string()).as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if import_full_url.scheme() == "http"
|| import_full_url.scheme() == "https"
{
result.push_str(
format_quoted_string(&import_full_url.to_string()).as_str(),
);
}
}
}
} else {
if func_name == "url" {
// Skip empty url()'s
if value.len() == 0 {
continue;
}
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
} else {
let resolved_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&resolved_url,
options,
depth + 1,
) {
Ok((data, final_url, media_type, charset)) => {
let mut data_url =
create_data_url(&media_type, &charset, &data, &final_url);
data_url.set_fragment(resolved_url.fragment());
result.push_str(
format_quoted_string(&data_url.to_string()).as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if resolved_url.scheme() == "http"
|| resolved_url.scheme() == "https"
{
result.push_str(
format_quoted_string(&resolved_url.to_string())
.as_str(),
);
}
}
}
}
} else {
result.push_str(format_quoted_string(value).as_str());
}
}
}
Token::Number {
ref has_sign,
ref value,
..
} => {
if *has_sign && *value >= 0. {
result.push_str("+");
}
result.push_str(&value.to_string())
}
Token::Percentage {
ref has_sign,
ref unit_value,
..
} => {
if *has_sign && *unit_value >= 0. {
result.push_str("+");
}
result.push_str(&(unit_value * 100.0).to_string());
result.push_str("%");
}
Token::Dimension {
ref has_sign,
ref value,
ref unit,
..
} => {
if *has_sign && *value >= 0. {
result.push_str("+");
}
result.push_str(&value.to_string());
result.push_str(&unit.to_string());
}
// #selector, #id...
Token::IDHash(ref value) => {
curr_rule = "".to_string();
result.push_str("#");
result.push_str(&format_ident(value));
}
// url()
Token::UnquotedUrl(ref value) => {
let is_import: bool = curr_rule == "import";
if is_import {
// Reset current at-rule value
curr_rule = "".to_string();
}
// Skip empty url()'s
if value.len() < 1 {
result.push_str("url()");
continue;
} else if value.starts_with("#") {
result.push_str("url(");
result.push_str(value);
result.push_str(")");
continue;
}
result.push_str("url(");
if is_import {
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&full_url,
options,
depth + 1,
) {
Ok((css, final_url, media_type, charset)) => {
let mut data_url = create_data_url(
&media_type,
&charset,
embed_css(
cache,
client,
&final_url,
&String::from_utf8_lossy(&css),
options,
depth + 1,
)
.as_bytes(),
&final_url,
);
data_url.set_fragment(full_url.fragment());
result.push_str(format_quoted_string(&data_url.to_string()).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if full_url.scheme() == "http" || full_url.scheme() == "https" {
result
.push_str(format_quoted_string(&full_url.to_string()).as_str());
}
}
}
} else {
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
} else {
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&full_url,
options,
depth + 1,
) {
Ok((data, final_url, media_type, charset)) => {
let mut data_url =
create_data_url(&media_type, &charset, &data, &final_url);
data_url.set_fragment(full_url.fragment());
result
.push_str(format_quoted_string(&data_url.to_string()).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if full_url.scheme() == "http" || full_url.scheme() == "https" {
result.push_str(
format_quoted_string(&full_url.to_string()).as_str(),
);
}
}
}
}
}
result.push_str(")");
}
// =
Token::Delim(ref value) => result.push_str(&value.to_string()),
Token::Function(ref name) => {
let function_name: &str = &name.clone();
result.push_str(function_name);
result.push_str("(");
let block_css: String = parser
.parse_nested_block(|parser| {
process_css(
cache,
client,
document_url,
parser,
options,
depth,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
)
})
.unwrap();
result.push_str(block_css.as_str());
result.push_str(")");
}
Token::BadUrl(_) | Token::BadString(_) => {}
}
}
// Ensure empty CSS is really empty
if result.len() > 0 && result.trim().len() == 0 {
result = result.trim().to_string()
}
Ok(result)
}

File diff suppressed because it is too large Load Diff

68
src/http.rs Normal file
View File

@ -0,0 +1,68 @@
use crate::utils::{clean_url, data_to_dataurl, is_data_url};
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
pub fn retrieve_asset(
cache: &mut HashMap<String, String>,
client: &Client,
url: &str,
as_dataurl: bool,
mime: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
let cache_key = clean_url(&url);
if is_data_url(&url).unwrap() {
Ok((url.to_string(), url.to_string()))
} else {
if cache.contains_key(&cache_key) {
// url is in cache
if !opt_silent {
eprintln!("{} (from cache)", &url);
}
let data = cache.get(&cache_key).unwrap();
Ok((data.to_string(), url.to_string()))
} else {
// url not in cache, we request it
let mut response = client.get(url).send()?;
let res_url = response.url().to_string();
if !opt_silent {
if url == res_url {
eprintln!("{}", &url);
} else {
eprintln!("{} -> {}", &url, &res_url);
}
}
let new_cache_key = clean_url(&res_url);
if as_dataurl {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain MIME type by reading the Content-Type header
let mimetype = if mime == "" {
response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or(&mime)
} else {
mime
};
let dataurl = data_to_dataurl(&mimetype, &data);
// insert in cache
cache.insert(new_cache_key, dataurl.clone());
Ok((dataurl, res_url))
} else {
let content = response.text().unwrap();
// insert in cache
cache.insert(new_cache_key, content.clone());
Ok((content, res_url))
}
}
}
}

View File

@ -1,7 +1,7 @@
const JS_DOM_EVENT_ATTRS: &'static [&str] = &[
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
const JS_DOM_EVENT_ATTRS: &[&str] = &[
// From WHATWG HTML spec 8.1.5.2 'Event handlers on elements, Document objects, and Window objects':
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
// https://html.spec.whatwg.org/#attributes-3 (table 'List of event handler content attributes')
// Global event handlers
"onabort",

View File

@ -1,6 +1,13 @@
pub mod css;
#[macro_use]
extern crate lazy_static;
#[macro_use]
mod macros;
pub mod html;
pub mod http;
pub mod js;
pub mod opts;
pub mod url;
pub mod utils;
#[cfg(test)]
pub mod tests;

9
src/macros.rs Normal file
View File

@ -0,0 +1,9 @@
#[macro_export]
macro_rules! str {
() => {
String::new()
};
($val: expr) => {
ToString::to_string(&$val)
};
}

View File

@ -1,329 +1,111 @@
use encoding_rs::Encoding;
use html5ever::rcdom::RcDom;
#[macro_use]
extern crate clap;
mod args;
mod macros;
use crate::args::AppArgs;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::http::retrieve_asset;
use monolith::utils::is_valid_url;
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::fs;
use std::io::{self, prelude::*, Error, Write};
use std::path::Path;
use std::fs::File;
use std::io::{self, Error, Write};
use std::process;
use std::time::Duration;
use url::Url;
use monolith::html::{
add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
serialize_document, set_base_url, set_charset, walk_and_embed_assets,
};
use monolith::opts::Options;
use monolith::url::{create_data_url, resolve_url};
use monolith::utils::retrieve_asset;
enum Output {
Stdout(io::Stdout),
File(fs::File),
File(File),
}
impl Output {
fn new(file_path: &str) -> Result<Output, Error> {
if file_path.is_empty() || file_path.eq("-") {
if file_path.is_empty() {
Ok(Output::Stdout(io::stdout()))
} else {
Ok(Output::File(fs::File::create(file_path)?))
Ok(Output::File(File::create(file_path)?))
}
}
fn write(&mut self, bytes: &Vec<u8>) -> Result<(), Error> {
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
match self {
Output::Stdout(stdout) => {
stdout.write_all(bytes)?;
// Ensure newline at end of output
if bytes.last() != Some(&b"\n"[0]) {
stdout.write(b"\n")?;
}
writeln!(stdout, "{}", s)?;
stdout.flush()
}
Output::File(file) => {
file.write_all(bytes)?;
// Ensure newline at end of output
if bytes.last() != Some(&b"\n"[0]) {
file.write(b"\n")?;
}
file.flush()
Output::File(f) => {
writeln!(f, "{}", s)?;
f.flush()
}
}
}
}
pub fn read_stdin() -> Vec<u8> {
let mut buffer: Vec<u8> = vec![];
match io::stdin().lock().read_to_end(&mut buffer) {
Ok(_) => buffer,
Err(_) => buffer,
}
}
fn main() {
let options = Options::from_args();
let app_args = AppArgs::get();
// Check if target was provided
if options.target.len() == 0 {
if !options.silent {
eprintln!("No target specified");
}
if !is_valid_url(app_args.url_target.as_str()) {
eprintln!(
"Only HTTP and HTTPS URLs are allowed but got: {}",
&app_args.url_target
);
process::exit(1);
}
// Check if custom charset is valid
if let Some(custom_charset) = options.charset.clone() {
if !Encoding::for_label_no_replacement(custom_charset.as_bytes()).is_some() {
eprintln!("Unknown encoding: {}", &custom_charset);
process::exit(1);
}
}
let mut use_stdin: bool = false;
let target_url = match options.target.as_str() {
"-" => {
// Read from pipe (stdin)
use_stdin = true;
// Set default target URL to an empty data URL; the user can set it via --base-url
Url::parse("data:text/html,").unwrap()
}
target => match Url::parse(&target) {
Ok(url) => match url.scheme() {
"data" | "file" | "http" | "https" => url,
unsupported_scheme => {
if !options.silent {
eprintln!("Unsupported target URL type: {}", unsupported_scheme);
}
process::exit(1)
}
},
Err(_) => {
// Failed to parse given base URL (perhaps it's a filesystem path?)
let path: &Path = Path::new(&target);
match path.exists() {
true => match path.is_file() {
true => {
let canonical_path = fs::canonicalize(&path).unwrap();
match Url::from_file_path(canonical_path) {
Ok(url) => url,
Err(_) => {
if !options.silent {
eprintln!(
"Could not generate file URL out of given path: {}",
&target
);
}
process::exit(1);
}
}
}
false => {
if !options.silent {
eprintln!("Local target is not a file: {}", &target);
}
process::exit(1);
}
},
false => {
// It is not a FS path, now we do what browsers do:
// prepend "http://" and hope it points to a website
Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target))
.unwrap()
}
}
}
},
};
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
// Initialize client
let mut cache = HashMap::new();
let mut header_map = HeaderMap::new();
if let Some(user_agent) = &options.user_agent {
header_map.insert(
USER_AGENT,
HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"),
);
}
let client = if options.timeout > 0 {
Client::builder().timeout(Duration::from_secs(options.timeout))
} else {
// No timeout is default
Client::builder()
}
.danger_accept_invalid_certs(options.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
header_map.insert(
USER_AGENT,
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
);
// At first we assume that base URL is the same as target URL
let mut base_url: Url = target_url.clone();
let client = Client::builder()
.timeout(Duration::from_secs(10))
.danger_accept_invalid_certs(app_args.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
let data: Vec<u8>;
let mut document_encoding: String = "".to_string();
let mut dom: RcDom;
// Retrieve root document
let (data, final_url) = retrieve_asset(
&mut cache,
&client,
app_args.url_target.as_str(),
false,
"",
app_args.silent,
)
.expect("Could not retrieve assets in HTML");
let dom = html_to_dom(&data);
// Retrieve target document
if use_stdin {
data = read_stdin();
} else if target_url.scheme() == "file"
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|| target_url.scheme() == "data"
{
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
Ok((retrieved_data, final_url, media_type, charset)) => {
// Make sure the media type is text/html
if !media_type.eq_ignore_ascii_case("text/html") {
if !options.silent {
eprintln!("Unsupported document media type");
}
process::exit(1);
}
walk_and_embed_assets(
&mut cache,
&client,
&final_url,
&dom.document,
app_args.no_css,
app_args.no_js,
app_args.no_images,
app_args.silent,
app_args.no_frames,
);
if options
.base_url
.clone()
.unwrap_or("".to_string())
.is_empty()
{
base_url = final_url;
}
let html: String = stringify_document(
&dom.document,
app_args.no_css,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.isolate,
);
data = retrieved_data;
document_encoding = charset;
}
Err(_) => {
if !options.silent {
eprintln!("Could not retrieve target document");
}
process::exit(1);
}
}
} else {
process::exit(1);
}
// Initial parse
dom = html_to_dom(&data, document_encoding.clone());
// TODO: investigate if charset from filesystem/data URL/HTTP headers
// has say over what's specified in HTML
// Attempt to determine document's charset
if let Some(html_charset) = get_charset(&dom.document) {
if !html_charset.is_empty() {
// Check if the charset specified inside HTML is valid
if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) {
document_encoding = html_charset;
dom = html_to_dom(&data, encoding.name().to_string());
}
}
}
// Use custom base URL if specified, read and use what's in the DOM otherwise
let custom_base_url: String = options.base_url.clone().unwrap_or("".to_string());
if custom_base_url.is_empty() {
// No custom base URL is specified
// Try to see if document has BASE element
if let Some(existing_base_url) = get_base_url(&dom.document) {
base_url = resolve_url(&target_url, &existing_base_url);
}
} else {
// Custom base URL provided
match Url::parse(&custom_base_url) {
Ok(parsed_url) => {
if parsed_url.scheme() == "file" {
// File base URLs can only work with
// documents saved from filesystem
if target_url.scheme() == "file" {
base_url = parsed_url;
}
} else {
base_url = parsed_url;
}
}
Err(_) => {
// Failed to parse given base URL, perhaps it's a filesystem path?
if target_url.scheme() == "file" {
// Relative paths could work for documents saved from filesystem
let path: &Path = Path::new(&custom_base_url);
if path.exists() {
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
Ok(file_url) => {
base_url = file_url;
}
Err(_) => {
if !options.silent {
eprintln!(
"Could not map given path to base URL: {}",
custom_base_url
);
}
process::exit(1);
}
}
}
}
}
}
}
// Traverse through the document and embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Update or add new BASE element to reroute network requests and hash-links
if let Some(new_base_url) = options.base_url.clone() {
dom = set_base_url(&dom.document, new_base_url);
}
// Request and embed /favicon.ico (unless it's already linked in the document)
if !options.no_images
&& (target_url.scheme() == "http" || target_url.scheme() == "https")
&& !has_favicon(&dom.document)
{
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");
match retrieve_asset(
&mut cache,
&client,
&target_url,
&favicon_ico_url,
&options,
0,
) {
Ok((data, final_url, media_type, charset)) => {
let favicon_data_url: Url =
create_data_url(&media_type, &charset, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url.to_string());
}
Err(_) => {
// Failed to retrieve /favicon.ico
}
}
}
// Save using specified charset, if given
if let Some(custom_charset) = options.charset.clone() {
document_encoding = custom_charset;
dom = set_charset(dom, document_encoding.clone());
}
// Serialize DOM tree
let mut result: Vec<u8> = serialize_document(dom, document_encoding, &options);
// Prepend metadata comment tag
if !options.no_metadata {
let mut metadata_comment: String = create_metadata_tag(&target_url);
metadata_comment += "\n";
result.splice(0..0, metadata_comment.as_bytes().to_vec());
}
// Define output
let mut output = Output::new(&options.output).expect("Could not prepare output");
// Write result into stdout or file
output.write(&result).expect("Could not write HTML output");
output
.writeln_str(&html)
.expect("Could not write HTML output");
}

View File

@ -1,144 +0,0 @@
use clap::{App, Arg, ArgAction};
use std::env;
#[derive(Default)]
pub struct Options {
pub no_audio: bool,
pub base_url: Option<String>,
pub no_css: bool,
pub charset: Option<String>,
pub domains: Option<Vec<String>>,
pub ignore_errors: bool,
pub exclude_domains: bool,
pub no_frames: bool,
pub no_fonts: bool,
pub no_images: bool,
pub isolate: bool,
pub no_js: bool,
pub insecure: bool,
pub no_metadata: bool,
pub output: String,
pub silent: bool,
pub timeout: u64,
pub user_agent: Option<String>,
pub no_video: bool,
pub target: String,
pub no_color: bool,
pub unwrap_noscript: bool,
}
const ASCII: &'static str = " \
_____ ______________ __________ ___________________ ___
| \\ / \\ | | | | | |
| \\_/ __ \\_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |\\ /| |__| _ |__| |____| | | | | __ |
| | \\___/ | | \\ | | | | | | |
|___| |__________| \\_____________________| |___| |___| |___|
";
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
const ENV_VAR_TERM: &str = "TERM";
impl Options {
pub fn from_args() -> Options {
let app = App::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
.author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str())
.about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str())
.args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-C, --charset=[UTF-8] 'Enforces custom encoding'")
.arg(
Arg::with_name("domains")
.short('d')
.long("domains")
.takes_value(true)
.value_name("DOMAINS")
.action(ArgAction::Append)
.help("Whitelist of domains"),
)
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
.args_from_usage("-E, --exclude-domains 'Treat specified domains as blacklist'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage(
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
)
.args_from_usage(
"-o, --output=[document.html] 'Writes output to <file>, use - for STDOUT'",
)
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
.args_from_usage("-v, --no-video 'Removes video sources'")
.arg(
Arg::with_name("target")
.required(true)
.takes_value(true)
.index(1)
.help("URL or file path, use - for STDIN"),
)
.get_matches();
let mut options: Options = Options::default();
// Process the command
options.target = app
.value_of("target")
.expect("please set target")
.to_string();
options.no_audio = app.is_present("no-audio");
if let Some(base_url) = app.value_of("base-url") {
options.base_url = Some(base_url.to_string());
}
options.no_css = app.is_present("no-css");
if let Some(charset) = app.value_of("charset") {
options.charset = Some(charset.to_string());
}
if let Some(domains) = app.get_many::<String>("domains") {
let list_of_domains: Vec<String> = domains.map(|v| v.clone()).collect::<Vec<_>>();
options.domains = Some(list_of_domains);
}
options.ignore_errors = app.is_present("ignore-errors");
options.exclude_domains = app.is_present("exclude-domains");
options.no_frames = app.is_present("no-frames");
options.no_fonts = app.is_present("no-fonts");
options.no_images = app.is_present("no-images");
options.isolate = app.is_present("isolate");
options.no_js = app.is_present("no-js");
options.insecure = app.is_present("insecure");
options.no_metadata = app.is_present("no-metadata");
options.output = app.value_of("output").unwrap_or("").to_string();
options.silent = app.is_present("silent");
options.timeout = app
.value_of("timeout")
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
.parse::<u64>()
.unwrap();
if let Some(user_agent) = app.value_of("user-agent") {
options.user_agent = Some(user_agent.to_string());
} else {
options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
}
options.unwrap_noscript = app.is_present("unwrap-noscript");
options.no_video = app.is_present("no-video");
options.no_color =
env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr);
if let Some(term) = env::var_os(ENV_VAR_TERM) {
if term == "dumb" {
options.no_color = true;
}
}
options
}
}

520
src/tests/html.rs Normal file
View File

@ -0,0 +1,520 @@
use crate::html::{
get_node_name, get_parent_node, html_to_dom, is_icon, stringify_document, walk_and_embed_assets,
};
use html5ever::rcdom::{Handle, NodeData};
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
#[test]
fn test_is_icon() {
assert_eq!(is_icon("icon"), true);
assert_eq!(is_icon("Shortcut Icon"), true);
assert_eq!(is_icon("ICON"), true);
assert_eq!(is_icon("mask-icon"), true);
assert_eq!(is_icon("fluid-icon"), true);
assert_eq!(is_icon("stylesheet"), false);
assert_eq!(is_icon(""), false);
}
#[test]
fn test_get_parent_node_name() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent = get_parent_node(node);
let parent_node_name = get_node_name(&parent);
if node_name == "head" || node_name == "body" {
assert_eq!(parent_node_name, "html");
} else if node_name == "div" {
assert_eq!(parent_node_name, "body");
} else if node_name == "p" {
assert_eq!(parent_node_name, "div");
}
println!("{}", node_name);
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 7);
}
#[test]
fn test_walk_and_embed_assets() {
let cache = &mut HashMap::new();
let html = "<div><P></P></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\" href=\"\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"icon\" href=\"\">\
</head>\
<body>\
<div>\
<img src=\"data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0\
lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\
</div>\
</body>\
</html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_frames() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script src=\"\"></script>\
<script></script></div></body></html>"
);
}
#[test]
fn test_walk_and_embed_with_no_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script src=\"\"></script></head>\
<body></body>\
</html>"
);
}
#[test]
fn test_stringify_document() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
#[test]
fn test_stringify_document_isolate() {
let html = "<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = true;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
</div>\
</body>\
</html>"
);
}
#[test]
fn test_stringify_document_no_css() {
let html = "<!doctype html>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\">\
</head>\
<body><div style=\"display: none;\"></div></body>\
</html>"
);
}
#[test]
fn test_stringify_document_no_frames() {
let html = "<!doctype html>\
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
<body><div><script src=\"some.js\"></script></div></body>\
</html>"
);
}
#[test]
fn test_stringify_document_isolate_no_frames_no_js_no_css_no_images() {
let html = "<!doctype html>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html_to_dom(&html);
let opt_isolate: bool = true;
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src \'unsafe-inline\' data:; style-src \'none\'; frame-src \'none\';child-src \'none\'; script-src \'none\'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\">\
<iframe src=\"some.html\"></iframe>\
</div>\
</body>\
</html>"
);
}

25
src/tests/http.rs Normal file
View File

@ -0,0 +1,25 @@
use crate::http::retrieve_asset;
use reqwest::blocking::Client;
use std::collections::HashMap;
#[test]
fn test_retrieve_asset() {
let cache = &mut HashMap::new();
let client = Client::new();
let (data, final_url) =
retrieve_asset(cache, &client, "data:text/html;base64,...", true, "", false).unwrap();
assert_eq!(&data, "data:text/html;base64,...");
assert_eq!(&final_url, "data:text/html;base64,...");
let (data, final_url) = retrieve_asset(
cache,
&client,
"data:text/html;base64,...",
true,
"image/png",
false,
)
.unwrap();
assert_eq!(&data, "data:text/html;base64,...");
assert_eq!(&final_url, "data:text/html;base64,...");
}

13
src/tests/js.rs Normal file
View File

@ -0,0 +1,13 @@
use crate::js::attr_is_event_handler;
#[test]
fn test_attr_is_event_handler() {
// succeeding
assert!(attr_is_event_handler("onBlur"));
assert!(attr_is_event_handler("onclick"));
assert!(attr_is_event_handler("onClick"));
// failing
assert!(!attr_is_event_handler("href"));
assert!(!attr_is_event_handler(""));
assert!(!attr_is_event_handler("class"));
}

4
src/tests/mod.rs Normal file
View File

@ -0,0 +1,4 @@
mod html;
mod http;
mod js;
mod utils;

177
src/tests/utils.rs Normal file
View File

@ -0,0 +1,177 @@
use crate::utils::{
clean_url, data_to_dataurl, detect_mimetype, is_data_url, is_valid_url, resolve_url,
url_has_protocol,
};
use url::ParseError;
#[test]
fn test_data_to_dataurl() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let datauri = data_to_dataurl(mime, data.as_bytes());
assert_eq!(
&datauri,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn test_detect_mimetype() {
// image
assert_eq!(detect_mimetype(b"GIF87a"), "image/gif");
assert_eq!(detect_mimetype(b"GIF89a"), "image/gif");
assert_eq!(detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
assert_eq!(detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"), "image/png");
assert_eq!(detect_mimetype(b"<?xml "), "image/svg+xml");
assert_eq!(detect_mimetype(b"<svg "), "image/svg+xml");
assert_eq!(detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
assert_eq!(detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
// audio
assert_eq!(detect_mimetype(b"ID3"), "audio/mpeg");
assert_eq!(detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
assert_eq!(detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
assert_eq!(detect_mimetype(b"OggS"), "audio/ogg");
assert_eq!(detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
assert_eq!(detect_mimetype(b"fLaC"), "audio/x-flac");
// video
assert_eq!(detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
assert_eq!(detect_mimetype(b"....ftyp"), "video/mp4");
assert_eq!(detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
assert_eq!(detect_mimetype(b"....moov"), "video/quicktime");
assert_eq!(detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
}
#[test]
fn test_url_has_protocol() {
// succeeding
assert_eq!(
url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
true
);
assert_eq!(url_has_protocol("tel:5551234567"), true);
assert_eq!(
url_has_protocol("ftp:user:password@some-ftp-server.com"),
true
);
assert_eq!(url_has_protocol("javascript:void(0)"), true);
assert_eq!(url_has_protocol("http://news.ycombinator.com"), true);
assert_eq!(url_has_protocol("https://github.com"), true);
assert_eq!(
url_has_protocol("MAILTO:somebody@somewhere.com?subject=hello"),
true
);
// failing
assert_eq!(
url_has_protocol("//some-hostname.com/some-file.html"),
false
);
assert_eq!(url_has_protocol("some-hostname.com/some-file.html"), false);
assert_eq!(url_has_protocol("/some-file.html"), false);
assert_eq!(url_has_protocol(""), false);
}
#[test]
fn test_is_valid_url() {
// succeeding
assert!(is_valid_url("https://www.rust-lang.org/"));
assert!(is_valid_url("http://kernel.org"));
// failing
assert!(!is_valid_url("//kernel.org"));
assert!(!is_valid_url("./index.html"));
assert!(!is_valid_url("some-local-page.htm"));
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
assert!(!is_valid_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn test_resolve_url() -> Result<(), ParseError> {
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://another-host.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.w3schools.com/html/default.asp"
);
Ok(())
}
#[test]
fn test_is_data_url() {
// succeeding
assert!(
is_data_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap_or(false)
);
// failing
assert!(!is_data_url("https://kernel.org").unwrap_or(false));
assert!(!is_data_url("//kernel.org").unwrap_or(false));
assert!(!is_data_url("").unwrap_or(false));
}
#[test]
fn test_clean_url() {
assert_eq!(
clean_url("https://somewhere.com/font.eot#iefix"),
"https://somewhere.com/font.eot"
);
assert_eq!(
clean_url("https://somewhere.com/font.eot#"),
"https://somewhere.com/font.eot"
);
assert_eq!(
clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
);
}

View File

@ -1,82 +0,0 @@
use base64;
use percent_encoding::percent_decode_str;
use url::Url;
use crate::utils::{detect_media_type, parse_content_type};
pub const EMPTY_IMAGE_DATA_URL: &'static str = "data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=";
pub fn clean_url(url: Url) -> Url {
let mut url = url.clone();
// Clear fragment (if any)
url.set_fragment(None);
url
}
pub fn create_data_url(media_type: &str, charset: &str, data: &[u8], final_asset_url: &Url) -> Url {
// TODO: move this block out of this function
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &final_asset_url)
} else {
media_type.to_string()
};
let mut data_url: Url = Url::parse("data:,").unwrap();
let c: String =
if !charset.trim().is_empty() && !charset.trim().eq_ignore_ascii_case("US-ASCII") {
format!(";charset={}", charset.trim())
} else {
"".to_string()
};
data_url.set_path(format!("{}{};base64,{}", media_type, c, base64::encode(data)).as_str());
data_url
}
pub fn is_url_and_has_protocol(input: &str) -> bool {
match Url::parse(&input) {
Ok(parsed_url) => {
return parsed_url.scheme().len() > 0;
}
Err(_) => {
return false;
}
}
}
pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
let path: String = url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
// Split data URL into meta data and raw data
let content_type: String = path.chars().take(comma_loc).collect();
let data: String = path.chars().skip(comma_loc + 1).collect();
// Parse meta data
let (media_type, charset, is_base64) = parse_content_type(&content_type);
// Parse raw data into vector of bytes
let text: String = percent_decode_str(&data).decode_utf8_lossy().to_string();
let blob: Vec<u8> = if is_base64 {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, charset, blob)
}
pub fn resolve_url(from: &Url, to: &str) -> Url {
match Url::parse(&to) {
Ok(parsed_url) => parsed_url,
Err(_) => match from.join(to) {
Ok(joined) => joined,
Err(_) => Url::parse("data:,").unwrap(),
},
}
}

View File

@ -1,21 +1,54 @@
use crate::http::retrieve_asset;
use base64::encode;
use regex::Regex;
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use url::Url;
use url::{ParseError, Url};
use crate::opts::Options;
use crate::url::{clean_url, parse_data_url};
/// This monster of a regex is used to match any kind of URL found in CSS.
///
/// There are roughly three different categories that a found URL could fit
/// into:
/// - Font [found after a src: property in an @font-family rule]
/// - Stylesheet [denoted by an @import before the url
/// - Image [covers all other uses of the url() function]
///
/// This regex aims to extract the following information:
/// - What type of URL is it (font/image/css)
/// - Where is the part that needs to be replaced (incl any wrapping quotes)
/// - What is the URL (excl any wrapping quotes)
///
/// Essentially, the regex can be broken down into two parts:
///
/// `(?:(?P<import>@import)|(?P<font>src\s*:)\s+)?`
/// This matches the precursor to a font or CSS URL, and fills in a match under
/// either `<import>` (if it's a CSS URL) or `<font>` (if it's a font).
/// Determining whether or not it's an image can be done by the negation of both
/// of these. Either zero or one of these can match.
///
/// `url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)`
/// This matches the actual URL part of the url(), and must always match. It also
/// sets `<to_repl>` and `<url>` which correspond to everything within
/// `url(...)` and a usable URL, respectively.
///
/// Note, however, that this does not perform any validation of the found URL.
/// Malformed CSS could lead to an invalid URL being present. It is therefore
/// recomended that the URL gets manually validated.
const CSS_URL_REGEX_STR: &str = r###"(?:(?:(?P<stylesheet>@import)|(?P<font>src\s*:))\s+)?url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)"###;
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
const MAGIC: [[&[u8]; 2]; 18] = [
lazy_static! {
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap();
}
const MAGIC: [[&[u8]; 2]; 19] = [
// Image
[b"GIF87a", b"image/gif"],
[b"GIF89a", b"image/gif"],
[b"\xFF\xD8\xFF", b"image/jpeg"],
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
[b"<?xml ", b"image/svg+xml"],
[b"<svg ", b"image/svg+xml"],
[b"RIFF....WEBPVP8 ", b"image/webp"],
[b"\x00\x00\x01\x00", b"image/x-icon"],
@ -33,367 +66,142 @@ const MAGIC: [[&[u8]; 2]; 18] = [
[b"....moov", b"video/quicktime"],
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"application/javascript",
"application/json",
"image/svg+xml",
];
pub fn detect_media_type(data: &[u8], url: &Url) -> String {
// At first attempt to read file's header
for magic_item in MAGIC.iter() {
if data.starts_with(magic_item[0]) {
return String::from_utf8(magic_item[1].to_vec()).unwrap();
}
}
// If header didn't match any known magic signatures,
// try to guess media type from file name
let parts: Vec<&str> = url.path().split('/').collect();
detect_media_type_by_file_name(parts.last().unwrap())
}
pub fn detect_media_type_by_file_name(filename: &str) -> String {
let filename_lowercased: &str = &filename.to_lowercase();
let parts: Vec<&str> = filename_lowercased.split('.').collect();
let mime: &str = match parts.last() {
Some(v) => match *v {
"avi" => "video/avi",
"bmp" => "image/bmp",
"css" => "text/css",
"flac" => "audio/flac",
"gif" => "image/gif",
"htm" | "html" => "text/html",
"ico" => "image/x-icon",
"jpeg" | "jpg" => "image/jpeg",
"js" => "application/javascript",
"json" => "application/json",
"mp3" => "audio/mpeg",
"mp4" | "m4v" => "video/mp4",
"ogg" => "audio/ogg",
"ogv" => "video/ogg",
"pdf" => "application/pdf",
"png" => "image/png",
"svg" => "image/svg+xml",
"swf" => "application/x-shockwave-flash",
"tif" | "tiff" => "image/tiff",
"txt" => "text/plain",
"wav" => "audio/wav",
"webp" => "image/webp",
"woff" => "font/woff",
"woff2" => "font/woff2",
"xml" => "text/xml",
&_ => "",
},
None => "",
};
mime.to_string()
}
pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> bool {
if domain_to_match_against.len() == 0 {
return false;
}
if domain_to_match_against == "." {
return true;
}
let domain_partials: Vec<&str> = domain.trim_end_matches(".").rsplit(".").collect();
let domain_to_match_against_partials: Vec<&str> = domain_to_match_against
.trim_end_matches(".")
.rsplit(".")
.collect();
let domain_to_match_against_starts_with_a_dot = domain_to_match_against.starts_with(".");
let mut i: usize = 0;
let l: usize = std::cmp::max(
domain_partials.len(),
domain_to_match_against_partials.len(),
);
let mut ok: bool = true;
while i < l {
// Exit and return false if went out of bounds of domain to match against, and it didn't start with a dot
if !domain_to_match_against_starts_with_a_dot
&& domain_to_match_against_partials.len() < i + 1
{
ok = false;
break;
}
let domain_partial = if domain_partials.len() < i + 1 {
""
} else {
domain_partials.get(i).unwrap()
};
let domain_to_match_against_partial = if domain_to_match_against_partials.len() < i + 1 {
""
} else {
domain_to_match_against_partials.get(i).unwrap()
};
let parts_match = domain_to_match_against_partial.eq_ignore_ascii_case(domain_partial);
if !parts_match && domain_to_match_against_partial.len() != 0 {
ok = false;
break;
}
i += 1;
}
ok
}
pub fn indent(level: u32) -> String {
let mut result: String = String::new();
let mut l: u32 = level;
while l > 0 {
result += " ";
l -= 1;
}
result
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
media_type.to_lowercase().as_str().starts_with("text/")
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn parse_content_type(content_type: &str) -> (String, String, bool) {
let mut media_type: String = "text/plain".to_string();
let mut charset: String = "US-ASCII".to_string();
let mut is_base64: bool = false;
// Parse meta data
let content_type_items: Vec<&str> = content_type.split(';').collect();
let mut i: i8 = 0;
for item in &content_type_items {
if i == 0 {
if item.trim().len() > 0 {
media_type = item.trim().to_string();
}
} else {
if item.trim().eq_ignore_ascii_case("base64") {
is_base64 = true;
} else if item.trim().starts_with("charset=") {
charset = item.trim().chars().skip(8).collect();
}
}
i += 1;
}
(media_type, charset, is_base64)
}
pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &Url,
url: &Url,
options: &Options,
depth: u32,
) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
if url.scheme() == "data" {
let (media_type, charset, data) = parse_data_url(url);
Ok((data, url.clone(), media_type, charset))
} else if url.scheme() == "file" {
// Check if parent_url is also a file: URL (if not, then we don't embed the asset)
if parent_url.scheme() != "file" {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
"Security Error",
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
client.get("").send()?;
}
let path_buf: PathBuf = url.to_file_path().unwrap().clone();
let path: &Path = path_buf.as_path();
if path.exists() {
if path.is_dir() {
if !options.silent {
eprintln!(
"{}{}{} (is a directory){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
Err(client.get("").send().unwrap_err())
} else {
if !options.silent {
eprintln!("{}{}", indent(depth).as_str(), &url);
}
let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
Ok((
file_blob.clone(),
url.clone(),
detect_media_type(&file_blob, url),
"".to_string(),
))
}
} else {
if !options.silent {
eprintln!(
"{}{}{} (not found){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
Err(client.get("").send().unwrap_err())
}
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
let mimetype = if mime.is_empty() {
detect_mimetype(data)
} else {
let cache_key: String = clean_url(url.clone()).as_str().to_string();
mime.to_string()
};
format!("data:{};base64,{}", mimetype, encode(data))
}
if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it
if !options.silent {
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
}
Ok((
cache.get(&cache_key).unwrap().to_vec(),
url.clone(),
"".to_string(),
"".to_string(),
))
} else {
if let Some(domains) = &options.domains {
let domain_matches = domains
.iter()
.any(|d| domain_is_within_domain(url.host_str().unwrap(), &d.trim()));
if (options.exclude_domains && domain_matches)
|| (!options.exclude_domains && !domain_matches)
{
return Err(client.get("").send().unwrap_err());
}
}
// URL not in cache, we retrieve the file
match client.get(url.as_str()).send() {
Ok(response) => {
if !options.ignore_errors && response.status() != reqwest::StatusCode::OK {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
response.status(),
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
return Err(client.get("").send().unwrap_err());
}
let response_url: Url = response.url().clone();
if !options.silent {
if url.as_str() == response_url.as_str() {
eprintln!("{}{}", indent(depth).as_str(), &url);
} else {
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &response_url);
}
}
let new_cache_key: String = clean_url(response_url.clone()).to_string();
// Attempt to obtain media type and charset by reading Content-Type header
let content_type: &str = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or("");
let (media_type, charset, _is_base64) = parse_content_type(&content_type);
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
match response.bytes() {
Ok(b) => {
data = b.to_vec();
}
Err(error) => {
if !options.silent {
eprintln!(
"{}{}{}{}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
error,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
}
}
// Add retrieved resource to cache
cache.insert(new_cache_key, data.clone());
// Return
Ok((data, response_url, media_type, charset))
}
Err(error) => {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
error,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
Err(client.get("").send().unwrap_err())
}
}
pub fn detect_mimetype(data: &[u8]) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
return String::from_utf8(item[1].to_vec()).unwrap();
}
}
"".to_owned()
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
HAS_PROTOCOL.is_match(url.as_ref().to_lowercase().as_str())
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> Result<bool, ParseError> {
Url::parse(url.as_ref()).and_then(|u| Ok(u.scheme() == "data"))
}
pub fn is_valid_url<T: AsRef<str>>(path: T) -> bool {
REGEX_URL.is_match(path.as_ref())
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_valid_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
}
pub fn resolve_css_imports(
cache: &mut HashMap<String, String>,
client: &Client,
css_string: &str,
as_dataurl: bool,
href: &str,
opt_no_images: bool,
opt_silent: bool,
) -> String {
let mut resolved_css = String::from(css_string);
for link in REGEX_CSS_URL.captures_iter(&css_string) {
let target_link = link.name("url").unwrap().as_str();
// Determine the type of link
let is_stylesheet = link.name("stylesheet").is_some();
let is_font = link.name("font").is_some();
let is_image = !is_stylesheet && !is_font;
// Generate absolute URL for content
let embedded_url = match resolve_url(href, target_link) {
Ok(url) => url,
Err(_) => continue, // Malformed URL
};
// Download the asset. If it's more CSS, resolve that too
let content = if is_stylesheet {
// The link is an @import link
retrieve_asset(
cache,
client,
&embedded_url,
false, // Formating as data URL will be done later
"text/css", // Expect CSS
opt_silent,
)
.map(|(content, _)| {
resolve_css_imports(
cache,
client,
&content,
true, // Finally, convert to a dataurl
&embedded_url,
opt_no_images,
opt_silent,
)
})
} else if (is_image && !opt_no_images) || is_font {
// The link is some other, non-@import link
retrieve_asset(
cache,
client,
&embedded_url,
true, // Format as data URL
"", // Unknown MIME type
opt_silent,
)
.map(|(a, _)| a)
} else {
// If it's a datatype that has been opt_no'd out of, replace with
// absolute URL
Ok(embedded_url.clone())
}
.unwrap_or_else(|e| {
eprintln!("Warning: {}", e);
// If failed to resolve, replace with absolute URL
embedded_url
});
let replacement = format!("\"{}\"", &content);
let dest = link.name("to_repl").unwrap();
let offset = resolved_css.len() - css_string.len();
let target_range = (dest.start() + offset)..(dest.end() + offset);
resolved_css.replace_range(target_range, &replacement);
}
if as_dataurl {
data_to_dataurl("text/css", resolved_css.as_bytes())
} else {
resolved_css
}
}
pub fn clean_url<T: AsRef<str>>(url: T) -> String {
let mut result = Url::parse(url.as_ref()).unwrap();
// Clear fragment
result.set_fragment(None);
// Get rid of stray question mark
if result.query() == Some("") {
result.set_query(None);
}
result.to_string()
}

View File

@ -1,19 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Local HTML file</title>
<link href="local-style.css" rel="stylesheet" type="text/css" />
<link href="local-style-does-not-exist.css" rel="stylesheet" type="text/css" />
</head>
<body>
<img src="monolith.png" alt="" />
<a href="//local-file.html">Tricky href</a>
<a href="https://github.com/Y2Z/monolith">Remote URL</a>
<script src="local-script.js"></script>
</body>
</html>

View File

@ -1,2 +0,0 @@
document.body.style.backgroundColor = "green";
document.body.style.color = "red";

View File

@ -1,4 +0,0 @@
body {
background-color: #000;
color: #fff;
}

View File

@ -1,11 +0,0 @@
<style>
@charset 'UTF-8';
@import 'style.css';
@import url(style.css);
@import url('style.css');
</style>

View File

@ -1 +0,0 @@
body{background-color:#000;color:#fff}

View File

@ -1,23 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Attempt to import CSS via data URL asset</title>
<style>
body {
background-color: white;
color: black;
}
</style>
<link href="data:text/css;base64,QGltcG9ydCAic3R5bGUuY3NzIjsK" rel="stylesheet" type="text/css" />
</head>
<body>
<p>If you see pink background with white foreground then were in trouble</p>
</body>
</html>

View File

@ -1,4 +0,0 @@
body {
background-color: pink;
color: white;
}

View File

@ -1,17 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<title>Local HTML file</title>
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />
</head>
<body>
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>
<script src="script.js" integrity="sha256-ecrEsYh3+ICCX8BCrNSotXgI5534282JwJjx8Q9ZWLc="></script>
<script src="script.js" integrity="sha256-6idk9dK0bOkVdG7Oz4/0YLXSJya8xZHqbRZKMhYrt6o="></script>
</body>
</html>

View File

@ -1,3 +0,0 @@
function noop() {
console.log("monolith");
}

View File

@ -1,4 +0,0 @@
body {
background-color: #000;
color: #FFF;
}

View File

@ -1,5 +0,0 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

Before

Width:  |  Height:  |  Size: 296 B

View File

@ -1 +0,0 @@
<body><noscript><img src="image.svg" /></noscript></body>

View File

@ -1 +0,0 @@
<body><noscript><h1>JS is not active</h1><noscript><img src="image.svg" /></noscript></noscript></body>

View File

@ -1 +0,0 @@
<body><noscript><script>alert(1);</script><img src="image.svg" /></noscript></body>

View File

@ -1,5 +0,0 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

Before

Width:  |  Height:  |  Size: 296 B

View File

@ -1 +0,0 @@
<div style="background-image: url('image.svg')"></div>

View File

@ -1,9 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=GB2312"/>
<title>近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 </title>
</head>
<body>
<h1>近七成人减少线下需求 银行数字化转型提速</h1>
</body>
</html>

View File

@ -1,8 +0,0 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
&copy; Some Company
</body>
</html>

View File

@ -1,115 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn add_new_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
.arg("http://localhost:8000/")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<base href=\"http://localhost:8000/\"></base>\
</head><body>Hello, World!</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn keep_existing_when_none_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<base href=\"http://localhost:8000/\">\
</head><body>Hello, World!</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn override_existing_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
.arg("http://localhost/")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<base href=\"http://localhost/\">\
</head><body>Hello, World!</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn set_existing_to_empty_when_empty_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
.arg("")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<base href=\"\">\
</head><body>Hello, World!</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

View File

@ -1,144 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::Path;
use std::process::{Command, Stdio};
use url::Url;
#[test]
fn print_help_information() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("-h").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain program name, version, and usage information
// TODO
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn print_version() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("-V").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain program name and version
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn stdin_target_input() {
let mut echo = Command::new("echo")
.arg("Hello from STDIN")
.stdout(Stdio::piped())
.spawn()
.unwrap();
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML created out of STDIN
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn css_import_string() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/css/index.html");
let path_css: &Path = Path::new("tests/_data_/css/style.css");
assert!(path_html.is_file());
assert!(path_css.is_file());
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should list files that got retrieved
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_css}\n \
{file_url_css}\n \
{file_url_css}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_css = Url::from_file_path(fs::canonicalize(&path_css).unwrap()).unwrap(),
)
);
// STDOUT should contain embedded CSS url()'s
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_empty_target() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("").output().unwrap();
// STDERR should contain error description
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"No target specified\n"
);
// STDOUT should be empty
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
// Exit code should be 1
out.assert().code(1);
}
}

View File

@ -1,233 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn isolate_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain isolated HTML
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
</head><body>Hello, World!</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_css_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<style></style>\
</head><body>Hello</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_fonts_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-F")
.arg("data:text/html,<style>@font-face { font-family: myFont; src: url(font.woff); }</style>Hi")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no web fonts
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
<style></style>\
</head><body>Hi</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_frames_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-f")
.arg("data:text/html,<iframe src=\"https://duckduckgo.com\"></iframe>Hi")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no iframes
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_images_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no images
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
</head>\
<body>\
<img src=\"{empty_image}\">\
Hi\
</body>\
</html>\n",
empty_image = EMPTY_IMAGE_DATA_URL,
)
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn remove_js_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no JS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
<script></script></head>\
<body>Hi</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDERR should contain error description
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"Unsupported document media type\n"
);
// STDOUT should contain HTML
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
// Exit code should be 1
out.assert().code(1);
}
#[test]
fn security_disallow_local_assets_within_data_url_targets() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no JS in it
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

View File

@ -1,271 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::{Path, MAIN_SEPARATOR};
use std::process::Command;
use url::Url;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn local_file_target_input_relative_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}basic{s}local-file.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain list of retrieved file URLs, two missing
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{cwd}/tests/_data_/basic/local-file.html\n \
{file}{cwd}/tests/_data_/basic/local-style.css\n \
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n \
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n \
{file}{cwd}/tests/_data_/basic/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn local_file_target_input_absolute_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/basic/local-file.html");
let out = cmd
.arg("-M")
.arg("-Ijci")
.arg(path_html.as_os_str())
.output()
.unwrap();
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file_url_html}\n",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn local_file_url_target_input() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(format!(
"{file}{cwd}/tests/_data_/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
))
.output()
.unwrap();
// STDERR should contain list of retrieved file URLs
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn embed_file_url_local_asset_within_style_attribute() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/svg/index.html");
let path_svg: &Path = Path::new("tests/_data_/svg/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should list files that got retrieved
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><div style=\"background-image: url(&quot;data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=&quot;)\"></div>\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn discard_integrity_for_local_files() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-i")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/tests/_data_/integrity/index.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/tests/_data_/integrity/index.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDERR should contain list of retrieved file URLs
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{cwd}/tests/_data_/integrity/index.html\n \
{file}{cwd}/tests/_data_/integrity/style.css\n \
{file}{cwd}/tests/_data_/integrity/style.css\n \
{file}{cwd}/tests/_data_/integrity/script.js\n \
{file}{cwd}/tests/_data_/integrity/script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain HTML from the local file; integrity attributes should be missing
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNGRkY7Cn0K\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n \
<link href=\"style.css\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n</head>\n\n<body>\n \
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>\n \
<script src=\"data:application/javascript;base64,ZnVuY3Rpb24gbm9vcCgpIHsKICAgIGNvbnNvbGUubG9nKCJtb25vbGl0aCIpOwp9Cg==\"></script>\n \
<script src=\"script.js\"></script>\n\n\n\n\
</body></html>\n\
"
)
);
// Exit code should be 0
out.assert().code(0);
}
}

View File

@ -1,6 +0,0 @@
mod base_url;
mod basic;
mod data_url;
mod local_files;
mod noscript;
mod unusual_encodings;

View File

@ -1,170 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::Path;
use std::process::Command;
use url::Url;
#[test]
fn parse_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><noscript><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"></noscript>\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_nested() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/nested.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_with_script() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/script.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html>\
<head></head>\
<body>\
<!--noscript-->\
<img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\">\
<!--/noscript-->\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_attr_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-n")
.arg("data:text/html,<noscript class=\"\">test</noscript>")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain unwrapped contents of NOSCRIPT element
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

View File

@ -1,239 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use encoding_rs::Encoding;
use std::env;
use std::path::MAIN_SEPARATOR;
use std::process::{Command, Stdio};
#[test]
fn properly_save_document_with_gb2312() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document without any modificatons
let s: String;
if let Some(encoding) = Encoding::for_label(b"gb2312") {
let (string, _, _) = encoding.decode(&out.stdout);
s = string.to_string();
} else {
s = String::from_utf8_lossy(&out.stdout).to_string();
}
assert_eq!(
s,
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
<title>线\u{3000}--·-- </title>\n\
</head>\n\
<body>\n \
<h1>线\u{3000}</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_from_stdin() {
let mut echo = Command::new("cat")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.stdout(Stdio::piped())
.spawn()
.unwrap();
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML created out of STDIN
let s: String;
if let Some(encoding) = Encoding::for_label(b"gb2312") {
let (string, _, _) = encoding.decode(&out.stdout);
s = string.to_string();
} else {
s = String::from_utf8_lossy(&out.stdout).to_string();
}
assert_eq!(
s,
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
<title>线\u{3000}--·-- </title>\n\
</head>\n\
<body>\n \
<h1>线\u{3000}</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_custom_charset() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-C")
.arg("utf8")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document without any modificatons
assert_eq!(
String::from_utf8_lossy(&out.stdout).to_string(),
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=utf8\">\n \
<title>线\u{3000}--·-- </title>\n\
</head>\n\
<body>\n \
<h1>线\u{3000}</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_custom_charset_bad() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-C")
.arg("utf0")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
// STDERR should contain error message
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"Unknown encoding: utf0\n"
);
// STDOUT should be empty
assert_eq!(String::from_utf8_lossy(&out.stdout).to_string(), "");
// Exit code should be 1
out.assert().code(1);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::path::MAIN_SEPARATOR;
use std::process::Command;
#[test]
fn change_iso88591_to_utf8_to_properly_display_html_entities() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}iso-8859-1.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/iso-8859-1.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document but with UTF-8 charset
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html>\
<head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n \
</head>\n \
<body>\n \
<EFBFBD> Some Company\n \
\n\n</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

View File

@ -1,371 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use monolith::css;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let options = Options::default();
assert_eq!(
css::embed_css(cache, &client, &document_url, "", &options, 0),
""
);
}
#[test]
fn trim_if_empty() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let options = Options::default();
assert_eq!(
css::embed_css(cache, &client, &document_url, "\t \t ", &options, 0,),
""
);
}
#[test]
fn style_exclude_unquoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
const STYLE: &str = "/* border: none;*/\
background-image: url(https://somewhere.com/bg.png); \
list-style: url(/assets/images/bullet.svg);\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0,),
format!(
"/* border: none;*/\
background-image: url(\"{empty_image}\"); \
list-style: url(\"{empty_image}\");\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
}
#[test]
fn style_exclude_single_quoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
const STYLE: &str = "/* border: none;*/\
background-image: url('https://somewhere.com/bg.png'); \
list-style: url('/assets/images/bullet.svg');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0),
format!(
"/* border: none;*/\
background-image: url(\"{empty_image}\"); \
list-style: url(\"{empty_image}\");\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
}
#[test]
fn style_block() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("file:///").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \
background-image: url(\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\");\n\
}\n\
\n\
html > body {}";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
CSS
);
}
#[test]
fn attribute_selectors() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
[data-value] {
/* Attribute exists */
}
[data-value=\"foo\"] {
/* Attribute has this exact value */
}
[data-value*=\"foo\"] {
/* Attribute value contains this value somewhere in it */
}
[data-value~=\"foo\"] {
/* Attribute has this value in a space-separated list somewhere */
}
[data-value^=\"foo\"] {
/* Attribute value starts with this */
}
[data-value|=\"foo\"] {
/* Attribute value starts with this in a dash-separated list */
}
[data-value$=\"foo\"] {
/* Attribute value ends with this */
}
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
CSS
);
}
#[test]
fn import_string() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css,html{background-color:%23000}';\n\
\n\
@import url('data:text/css,html{color:%23fff}')\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
"\
@charset \"UTF-8\";\n\
\n\
@import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\
\n\
@import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\
"
);
}
#[test]
fn hash_urls() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
body {\n \
behavior: url(#default#something);\n\
}\n\
\n\
.scissorHalf {\n \
offset-path: url(#somePath);\n\
}\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS
);
}
#[test]
fn transform_percentages_and_degrees() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
div {\n \
transform: translate(-50%, -50%) rotate(-45deg);\n\
transform: translate(50%, 50%) rotate(45deg);\n\
transform: translate(+50%, +50%) rotate(+45deg);\n\
}\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS
);
}
#[test]
fn unusual_indents() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
.is\\:good:hover {\n \
color: green\n\
}\n\
\n\
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
color: black\n\
}\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS
);
}
#[test]
fn exclude_fonts() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.no_fonts = true;
options.silent = true;
const CSS: &str = "\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
const CSS_OUT: &str = " \
\n\
\n\
#identifier {\n \
font-family: \"My Font\" Arial\n\
}\n\
\n \
\n\
\n\
div {\n \
font-family: \"My Font\" Verdana\n\
}\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS_OUT
);
}
#[test]
fn content() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url(data:,) \"\\A\";\n\
white-space: pre }\n\
";
const CSS_OUT: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url(\"data:text/plain;base64,\") \"\\a \";\n\
white-space: pre }\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS_OUT
);
}
#[test]
fn ie_css_hack() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
div#p>svg>foreignObject>section:not(\\9) {\n\
width: 300px;\n\
width: 500px\\9;\n\
}\n\
";
const CSS_OUT: &str = "\
div#p>svg>foreignObject>section:not(\\9) {\n\
width: 300px;\n\
width: 500px\t;\n\
}\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS_OUT
);
}
}

View File

@ -1,88 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::css;
#[test]
fn backrgound() {
assert!(css::is_image_url_prop("background"));
}
#[test]
fn backrgound_image() {
assert!(css::is_image_url_prop("background-image"));
}
#[test]
fn backrgound_image_uppercase() {
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
}
#[test]
fn border_image() {
assert!(css::is_image_url_prop("border-image"));
}
#[test]
fn content() {
assert!(css::is_image_url_prop("content"));
}
#[test]
fn cursor() {
assert!(css::is_image_url_prop("cursor"));
}
#[test]
fn list_style() {
assert!(css::is_image_url_prop("list-style"));
}
#[test]
fn list_style_image() {
assert!(css::is_image_url_prop("list-style-image"));
}
#[test]
fn mask_image() {
assert!(css::is_image_url_prop("mask-image"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::css;
#[test]
fn empty() {
assert!(!css::is_image_url_prop(""));
}
#[test]
fn width() {
assert!(!css::is_image_url_prop("width"));
}
#[test]
fn color() {
assert!(!css::is_image_url_prop("color"));
}
#[test]
fn z_index() {
assert!(!css::is_image_url_prop("z-index"));
}
}

View File

@ -1,2 +0,0 @@
mod embed_css;
mod is_image_url_prop;

View File

@ -1,29 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use monolith::html;
#[test]
fn basic() {
let html = "<div>text</div>";
let mut dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head><link rel=\"icon\" href=\"I_AM_A_FAVICON_DATA_URL\"></link></head><body><div>text</div></body></html>"
);
}
}

View File

@ -1,89 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
#[test]
fn empty_input_sha256() {
assert!(html::check_integrity(
"".as_bytes(),
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
));
}
#[test]
fn sha256() {
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
));
}
#[test]
fn sha384() {
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
));
}
#[test]
fn sha512() {
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::html;
#[test]
fn empty_hash() {
assert!(!html::check_integrity("abcdef0123456789".as_bytes(), ""));
}
#[test]
fn empty_input_empty_hash() {
assert!(!html::check_integrity("".as_bytes(), ""));
}
#[test]
fn sha256() {
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha256-badhash"
));
}
#[test]
fn sha384() {
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha384-badhash"
));
}
#[test]
fn sha512() {
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha512-badhash"
));
}
}

View File

@ -1,83 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
use monolith::opts::Options;
#[test]
fn isolated() {
let mut options = Options::default();
options.isolate = true;
let csp_content = html::compose_csp(&options);
assert_eq!(
csp_content,
"default-src 'unsafe-eval' 'unsafe-inline' data:;"
);
}
#[test]
fn no_css() {
let mut options = Options::default();
options.no_css = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "style-src 'none';");
}
#[test]
fn no_fonts() {
let mut options = Options::default();
options.no_fonts = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "font-src 'none';");
}
#[test]
fn no_frames() {
let mut options = Options::default();
options.no_frames = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
}
#[test]
fn no_js() {
let mut options = Options::default();
options.no_js = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "script-src 'none';");
}
#[test]
fn no_images() {
let mut options = Options::default();
options.no_images = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "img-src data:;");
}
#[test]
fn all() {
let mut options = Options::default();
options.isolate = true;
options.no_css = true;
options.no_fonts = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
}
}

View File

@ -1,66 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use chrono::prelude::*;
use reqwest::Url;
use monolith::html;
#[test]
fn http_url() {
let url: Url = Url::parse("http://192.168.1.1/").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from {} at {} using {} v{} -->",
&url,
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
#[test]
fn file_url() {
let url: Url = Url::parse("file:///home/monolith/index.html").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from local source at {} using {} v{} -->",
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
#[test]
fn data_url() {
let url: Url = Url::parse("data:text/html,Hello%2C%20World!").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from local source at {} using {} v{} -->",
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
}

View File

@ -1,156 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use monolith::html;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn small_medium_large() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 1.5x, {} 2x",
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL,
),
);
}
#[test]
fn small_medium_only_medium_has_scale() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png, medium.png 1.5x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
format!("{}, {} 1.5x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
);
}
#[test]
fn commas_within_file_names() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x, large,l.png 2x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
);
}
#[test]
fn tabs_and_newlines_after_commas() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x,\nmedium,m.png 2x,\nlarge,l.png 3x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {} 3x",
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
),
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use monolith::html;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn trailing_comma() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x,";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x,", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
);
}
}

View File

@ -1,104 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
#[test]
fn present() {
let html = "<!doctype html>
<html>
<head>
<base href=\"https://musicbrainz.org\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(
html::get_base_url(&dom.document),
Some("https://musicbrainz.org".to_string())
);
}
#[test]
fn multiple_tags() {
let html = "<!doctype html>
<html>
<head>
<base href=\"https://www.discogs.com/\" />
<base href=\"https://musicbrainz.org\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(
html::get_base_url(&dom.document),
Some("https://www.discogs.com/".to_string())
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::html;
#[test]
fn absent() {
let html = "<!doctype html>
<html>
<head>
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), None);
}
#[test]
fn no_href() {
let html = "<!doctype html>
<html>
<head>
<base />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), None);
}
#[test]
fn empty_href() {
let html = "<!doctype html>
<html>
<head>
<base href=\"\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), Some("".to_string()));
}
}

View File

@ -1,72 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
#[test]
fn meta_content_type() {
let html = "<!doctype html>
<html>
<head>
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
#[test]
fn meta_charset() {
let html = "<!doctype html>
<html>
<head>
<meta charset=\"GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
#[test]
fn multiple_conflicting_meta_charset_first() {
let html = "<!doctype html>
<html>
<head>
<meta charset=\"utf-8\" />
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("utf-8".to_string()));
}
#[test]
fn multiple_conflicting_meta_content_type_first() {
let html = "<!doctype html>
<html>
<head>
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
<meta charset=\"utf-8\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
}

View File

@ -1,54 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use monolith::html;
#[test]
fn div_two_style_attributes() {
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "body" {
assert_eq!(html::get_node_attr(node, "class"), None);
} else if node_name == "div" {
assert_eq!(
html::get_node_attr(node, "style"),
Some("color: blue;".to_string())
);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 6);
}
}

View File

@ -1,53 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use monolith::html;
#[test]
fn parent_node_names() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent = html::get_parent_node(node);
let parent_node_name = html::get_node_name(&parent);
if node_name == "head" || node_name == "body" {
assert_eq!(parent_node_name, Some("html"));
} else if node_name == "div" {
assert_eq!(parent_node_name, Some("body"));
} else if node_name == "p" {
assert_eq!(parent_node_name, Some("div"));
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 7);
}
}

View File

@ -1,50 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
#[test]
fn icon() {
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document);
assert!(res);
}
#[test]
fn shortcut_icon() {
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document);
assert!(res);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::html;
#[test]
fn absent() {
let html = "<div>text</div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document);
assert!(!res);
}
}

View File

@ -1,58 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
#[test]
fn icon() {
assert!(html::is_icon("icon"));
}
#[test]
fn shortcut_icon_capitalized() {
assert!(html::is_icon("Shortcut Icon"));
}
#[test]
fn icon_uppercase() {
assert!(html::is_icon("ICON"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::html;
#[test]
fn mask_icon() {
assert!(!html::is_icon("mask-icon"));
}
#[test]
fn fluid_icon() {
assert!(!html::is_icon("fluid-icon"));
}
#[test]
fn stylesheet() {
assert!(!html::is_icon("stylesheet"));
}
#[test]
fn empty_string() {
assert!(!html::is_icon(""));
}
}

View File

@ -1,14 +0,0 @@
mod add_favicon;
mod check_integrity;
mod compose_csp;
mod create_metadata_tag;
mod embed_srcset;
mod get_base_url;
mod get_charset;
mod get_node_attr;
mod get_node_name;
mod has_favicon;
mod is_icon;
mod serialize_document;
mod set_node_attr;
mod walk_and_embed_assets;

View File

@ -1,153 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
use monolith::opts::Options;
#[test]
fn div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let options = Options::default();
assert_eq!(
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
#[test]
fn full_page_with_no_html_head_or_body() {
let html = "<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default();
options.isolate = true;
assert_eq!(
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
)),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
</div>\
</body>\
</html>"
);
}
#[test]
fn doctype_and_the_rest_no_html_head_or_body() {
let html = "<!doctype html>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default();
options.no_css = true;
assert_eq!(
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\">\
</head>\
<body><div style=\"display: none;\"></div></body>\
</html>"
);
}
#[test]
fn doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
let html = "<!doctype html>\
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default();
options.no_frames = true;
assert_eq!(
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
)),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
<body><div><script src=\"some.js\"></script></div></body>\
</html>"
);
}
#[test]
fn doctype_and_the_rest_all_forbidden() {
let html = "<!doctype html>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default();
options.isolate = true;
options.no_css = true;
options.no_fonts = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
assert_eq!(
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
)),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\">\
<iframe src=\"some.html\"></iframe>\
</div>\
</body>\
</html>"
);
}
}

View File

@ -1,108 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use monolith::html;
#[test]
fn html_lang_and_body_style() {
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "html" {
assert_eq!(html::get_node_attr(node, "lang"), Some("en".to_string()));
html::set_node_attr(node, "lang", Some("de".to_string()));
assert_eq!(html::get_node_attr(node, "lang"), Some("de".to_string()));
html::set_node_attr(node, "lang", None);
assert_eq!(html::get_node_attr(node, "lang"), None);
html::set_node_attr(node, "lang", Some("".to_string()));
assert_eq!(html::get_node_attr(node, "lang"), Some("".to_string()));
} else if node_name == "body" {
assert_eq!(html::get_node_attr(node, "style"), None);
html::set_node_attr(node, "style", Some("display: none;".to_string()));
assert_eq!(
html::get_node_attr(node, "style"),
Some("display: none;".to_string())
);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 5);
}
#[test]
fn body_background() {
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "body" {
assert_eq!(
html::get_node_attr(node, "background"),
Some("1".to_string())
);
html::set_node_attr(node, "background", None);
assert_eq!(html::get_node_attr(node, "background"), None);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 5);
}
}

View File

@ -1,518 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
use monolith::html;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn basic() {
let cache = &mut HashMap::new();
let html: &str = "<div><P></P></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn no_css() {
let html = "\
<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>\
"
);
}
#[test]
fn no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
}
#[test]
fn no_body_background_images() {
let html =
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
</head>\
<frameset>\
<frame src=\"\">\
</frameset>\
</html>\
"
);
}
#[test]
fn no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head></head>\
<body>\
<iframe src=\"\"></iframe>\
</body>\
</html>\
"
);
}
#[test]
fn no_js() {
let html = "\
<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head></head>\
<body>\
<div>\
<script></script>\
<script></script>\
</div>\
</body>\
</html>\
"
);
}
#[test]
fn keeps_integrity_for_unfamiliar_links() {
let html = "<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\">\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_known_links_nojs_nocss() {
let html = "\
<title>No integrity</title>\
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
<script integrity=\"\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link rel=\"stylesheet\">\
<script></script>\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_embedded_assets() {
let html = "\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\">\
<script></script>\
</head>\
<body>\
</body>\
</html>\
"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "\
<html>\
<head>\
<meta http-equiv=\"Refresh\" content=\"2\"/>\
<meta http-equiv=\"Location\" content=\"https://freebsd.org\"/>\
</head>\
<body>\
</body>\
</html>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<meta content=\"2\">\
<meta content=\"https://freebsd.org\">\
</head>\
<body>\
</body>\
</html>"
);
}
#[test]
fn processes_noscript_tags() {
let html = "\
<html>\
<body>\
<noscript>\
<img src=\"image.png\" />\
</noscript>\
</body>\
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"\
<html>\
<head>\
</head>\
<body>\
<noscript>\
<img src=\"{}\">\
</noscript>\
</body>\
</html>",
EMPTY_IMAGE_DATA_URL,
)
);
}
#[test]
fn preserves_script_type_json() {
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>\
</head>\
<body>\
</body>\
</html>"
);
}
}

View File

@ -1,53 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::js;
#[test]
fn onblur_camelcase() {
assert!(js::attr_is_event_handler("onBlur"));
}
#[test]
fn onclick_lowercase() {
assert!(js::attr_is_event_handler("onclick"));
}
#[test]
fn onclick_camelcase() {
assert!(js::attr_is_event_handler("onClick"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::js;
#[test]
fn href() {
assert!(!js::attr_is_event_handler("href"));
}
#[test]
fn empty_string() {
assert!(!js::attr_is_event_handler(""));
}
#[test]
fn class() {
assert!(!js::attr_is_event_handler("class"));
}
}

View File

@ -1 +0,0 @@
mod attr_is_event_handler;

View File

@ -1,14 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
#[test]
fn contains_correct_image_data() {
assert_eq!(empty_image!(), "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=");
}
}

View File

@ -1,2 +0,0 @@
mod empty_image;
mod str;

View File

@ -1,24 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
#[test]
fn returns_empty_string() {
assert_eq!(str!(), "");
}
#[test]
fn converts_integer_into_string() {
assert_eq!(str!(123), "123");
}
#[test]
fn converts_str_into_string() {
assert_eq!(str!("abc"), "abc");
}
}

View File

@ -1,8 +0,0 @@
mod cli;
mod css;
mod html;
mod js;
// mod macros;
mod opts;
mod url;
mod utils;

View File

@ -1,35 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::opts::Options;
#[test]
fn defaults() {
let options: Options = Options::default();
assert_eq!(options.no_audio, false);
assert_eq!(options.base_url, None);
assert_eq!(options.no_css, false);
assert_eq!(options.charset, None);
assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false);
assert_eq!(options.no_images, false);
assert_eq!(options.isolate, false);
assert_eq!(options.no_js, false);
assert_eq!(options.insecure, false);
assert_eq!(options.no_metadata, false);
assert_eq!(options.output, "".to_string());
assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, None);
assert_eq!(options.no_video, false);
assert_eq!(options.target, "".to_string());
}
}

View File

@ -1,63 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use reqwest::Url;
use monolith::url;
#[test]
fn preserve_original() {
let u: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap();
let clean_u: Url = url::clean_url(u.clone());
assert_eq!(clean_u.as_str(), "https://somewhere.com/font.eot");
assert_eq!(u.as_str(), "https://somewhere.com/font.eot#iefix");
}
#[test]
fn removes_fragment() {
assert_eq!(
url::clean_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap()).as_str(),
"https://somewhere.com/font.eot"
);
}
#[test]
fn removes_empty_fragment() {
assert_eq!(
url::clean_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(),
"https://somewhere.com/font.eot"
);
}
#[test]
fn removes_empty_fragment_and_keeps_empty_query() {
assert_eq!(
url::clean_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(),
"https://somewhere.com/font.eot?"
);
}
#[test]
fn removesempty_fragment_and_keeps_empty_query() {
assert_eq!(
url::clean_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()).as_str(),
"https://somewhere.com/font.eot?a=b&"
);
}
#[test]
fn keeps_credentials() {
assert_eq!(
url::clean_url(Url::parse("https://cookie:monster@gibson.internet/").unwrap()).as_str(),
"https://cookie:monster@gibson.internet/"
);
}
}

View File

@ -1,109 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use reqwest::Url;
use monolith::url;
#[test]
fn encode_string_with_specific_media_type() {
let media_type = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::create_data_url(
media_type,
"",
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(
data_url.as_str(),
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = url::create_data_url(
"image/svg+xml",
"",
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(
data_url.as_str(),
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"
);
}
#[test]
fn encode_string_with_specific_media_type_and_charset() {
let media_type = "application/javascript";
let charset = "utf8";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::create_data_url(
media_type,
charset,
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(
data_url.as_str(),
"data:application/javascript;charset=utf8;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn create_data_url_with_us_ascii_charset() {
let media_type = "";
let charset = "us-ascii";
let data = "";
let data_url = url::create_data_url(
media_type,
charset,
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(data_url.as_str(), "data:;base64,");
}
#[test]
fn create_data_url_with_utf8_charset() {
let media_type = "";
let charset = "utf8";
let data = "";
let data_url = url::create_data_url(
media_type,
charset,
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(data_url.as_str(), "data:;charset=utf8;base64,");
}
#[test]
fn create_data_url_with_media_type_text_plain_and_utf8_charset() {
let media_type = "text/plain";
let charset = "utf8";
let data = "";
let data_url = url::create_data_url(
media_type,
charset,
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(data_url.as_str(), "data:text/plain;charset=utf8;base64,");
}
}

View File

@ -1,110 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::url;
#[test]
fn mailto() {
assert!(url::is_url_and_has_protocol(
"mailto:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn tel() {
assert!(url::is_url_and_has_protocol("tel:5551234567"));
}
#[test]
fn ftp_no_slashes() {
assert!(url::is_url_and_has_protocol("ftp:some-ftp-server.com"));
}
#[test]
fn ftp_with_credentials() {
assert!(url::is_url_and_has_protocol(
"ftp://user:password@some-ftp-server.com"
));
}
#[test]
fn javascript() {
assert!(url::is_url_and_has_protocol("javascript:void(0)"));
}
#[test]
fn http() {
assert!(url::is_url_and_has_protocol("http://news.ycombinator.com"));
}
#[test]
fn https() {
assert!(url::is_url_and_has_protocol("https://github.com"));
}
#[test]
fn file() {
assert!(url::is_url_and_has_protocol("file:///tmp/image.png"));
}
#[test]
fn mailto_uppercase() {
assert!(url::is_url_and_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn empty_data_url() {
assert!(url::is_url_and_has_protocol("data:text/html,"));
}
#[test]
fn empty_data_url_surrounded_by_spaces() {
assert!(url::is_url_and_has_protocol(" data:text/html, "));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::url;
#[test]
fn url_with_no_protocol() {
assert_eq!(
url::is_url_and_has_protocol("//some-hostname.com/some-file.html"),
false
);
}
#[test]
fn relative_path() {
assert_eq!(
url::is_url_and_has_protocol("some-hostname.com/some-file.html"),
false
);
}
#[test]
fn relative_to_root_path() {
assert_eq!(url::is_url_and_has_protocol("/some-file.html"), false);
}
#[test]
fn empty_string() {
assert_eq!(url::is_url_and_has_protocol(""), false);
}
}

View File

@ -1,5 +0,0 @@
mod clean_url;
mod create_data_url;
mod is_url_and_has_protocol;
mod parse_data_url;
mod resolve_url;

Some files were not shown because too many files have changed in this diff Show More