automatically obtain favicon.ico

This commit is contained in:
Sunshine 2020-07-14 02:58:29 -04:00
parent 164e728ad3
commit 09d41d2cf1
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
6 changed files with 209 additions and 25 deletions

View file

@ -27,13 +27,38 @@ struct SrcSetItem<'a> {
descriptor: &'a str, descriptor: &'a str,
} }
const ICON_VALUES: &[&str] = &[ const ICON_VALUES: &[&str] = &["icon", "shortcut icon"];
"icon",
"shortcut icon", pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
"mask-icon", let mut buf: Vec<u8> = Vec::new();
"apple-touch-icon", serialize(&mut buf, document, SerializeOpts::default())
"fluid-icon", .expect("unable to serialize DOM into buffer");
]; let result = String::from_utf8(buf).unwrap();
let mut dom = html_to_dom(&result);
let doc = dom.get_document();
let html = get_child_node_by_name(&doc, "html");
let head = get_child_node_by_name(&html, "head");
let favicon_node = dom.create_element(
QualName::new(None, ns!(), local_name!("link")),
vec![
Attribute {
name: QualName::new(None, ns!(), local_name!("rel")),
value: format_tendril!("icon"),
},
Attribute {
name: QualName::new(None, ns!(), local_name!("href")),
value: format_tendril!("{}", favicon_data_url),
},
],
Default::default(),
);
// Append favicon node to HEAD
head.children.borrow_mut().push(favicon_node.clone());
dom
}
pub fn get_parent_node(node: &Handle) -> Handle { pub fn get_parent_node(node: &Handle) -> Handle {
let parent = node.parent.take().clone(); let parent = node.parent.take().clone();
@ -140,6 +165,56 @@ pub fn embed_srcset(
result result
} }
pub fn has_favicon(handle: &Handle) -> bool {
let mut found_favicon: bool = false;
match handle.data {
NodeData::Document => {
// Dig deeper
for child in handle.children.borrow().iter() {
if has_favicon(child) {
found_favicon = true;
break;
}
}
}
NodeData::Element {
ref name,
ref attrs,
..
} => {
match name.local.as_ref() {
"link" => {
let attrs_mut = &mut attrs.borrow_mut();
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "rel" {
if is_icon(attr.value.trim()) {
found_favicon = true;
break;
}
}
}
}
_ => {}
}
if !found_favicon {
// Dig deeper
for child in handle.children.borrow().iter() {
if has_favicon(child) {
found_favicon = true;
break;
}
}
}
}
_ => {}
}
found_favicon
}
pub fn walk_and_embed_assets( pub fn walk_and_embed_assets(
cache: &mut HashMap<String, Vec<u8>>, cache: &mut HashMap<String, Vec<u8>>,
client: &Client, client: &Client,
@ -1061,7 +1136,7 @@ pub fn walk_and_embed_assets(
} }
} }
pub fn html_to_dom(data: &str) -> html5ever::rcdom::RcDom { pub fn html_to_dom(data: &str) -> RcDom {
parse_document(RcDom::default(), Default::default()) parse_document(RcDom::default(), Default::default())
.from_utf8() .from_utf8()
.read_from(&mut data.as_bytes()) .read_from(&mut data.as_bytes())
@ -1087,7 +1162,9 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
let mut result = String::from_utf8(buf).unwrap(); let mut result = String::from_utf8(buf).unwrap();
// Take care of CSP // We can't make it isolate the page right away since it may have no HEAD element,
// ergo we have to serialize, parse the DOM again, insert the CSP meta tag, and then
// finally serialize and return the resulting string
if options.isolate if options.isolate
|| options.no_css || options.no_css
|| options.no_fonts || options.no_fonts
@ -1095,6 +1172,7 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|| options.no_js || options.no_js
|| options.no_images || options.no_images
{ {
// Take care of CSP
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
let mut dom = html_to_dom(&result); let mut dom = html_to_dom(&result);
let doc = dom.get_document(); let doc = dom.get_document();
@ -1123,9 +1201,6 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
head.children.borrow_mut().push(meta.clone()); head.children.borrow_mut().push(meta.clone());
head.children.borrow_mut().reverse(); head.children.borrow_mut().reverse();
// Note: we can't make it isolate the page right away since it may have no HEAD element,
// ergo we have to serialize, parse the DOM again, insert the CSP meta tag, and then
// finally serialize the result
serialize(&mut buf, &doc, SerializeOpts::default()) serialize(&mut buf, &doc, SerializeOpts::default())
.expect("unable to serialize DOM into buffer"); .expect("unable to serialize DOM into buffer");
result = String::from_utf8(buf).unwrap(); result = String::from_utf8(buf).unwrap();

View file

@ -8,9 +8,13 @@ use std::path::Path;
use std::process; use std::process;
use std::time::Duration; use std::time::Duration;
use monolith::html::{html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets}; use monolith::html::{
add_favicon, has_favicon, html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets,
};
use monolith::opts::Options; use monolith::opts::Options;
use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url}; use monolith::url::{
data_to_data_url, data_url_to_data, is_data_url, is_file_url, is_http_url, resolve_url,
};
use monolith::utils::retrieve_asset; use monolith::utils::retrieve_asset;
mod macros; mod macros;
@ -48,7 +52,7 @@ fn main() {
let original_target: &str = &options.target; let original_target: &str = &options.target;
let target_url: &str; let target_url: &str;
let base_url; let base_url;
let dom; let mut dom;
// Pre-process the input // Pre-process the input
let cwd_normalized: String = let cwd_normalized: String =
@ -137,6 +141,28 @@ fn main() {
process::exit(1); process::exit(1);
} }
// Request and embed /favicon.ico (unless it's already linked in the document)
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
match retrieve_asset(
&mut cache,
&client,
&base_url,
&favicon_ico_url,
options.silent,
0,
) {
Ok((data, final_url, media_type)) => {
let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url);
}
Err(_) => {
// Failed to retrieve favicon.ico
}
}
}
// Embed remote assets // Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0); walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);

View file

@ -0,0 +1,29 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use crate::html;
#[test]
fn basic() {
let html = "<div>text</div>";
let mut dom = html::html_to_dom(&html);
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head><link rel=\"icon\" href=\"I_AM_A_FAVICON_DATA_URL\"></link></head><body><div>text</div></body></html>"
);
}
}

View file

@ -0,0 +1,52 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
#[test]
fn icon() {
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
assert!(res);
}
#[test]
fn shortcut_icon() {
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
assert!(res);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
use crate::opts::Options;
#[test]
fn absent() {
let html = "<div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
assert!(!res);
}
}

View file

@ -23,16 +23,6 @@ mod passing {
fn icon_uppercase() { fn icon_uppercase() {
assert!(html::is_icon("ICON")); assert!(html::is_icon("ICON"));
} }
#[test]
fn mask_icon() {
assert!(html::is_icon("mask-icon"));
}
#[test]
fn fluid_icon() {
assert!(html::is_icon("fluid-icon"));
}
} }
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@ -46,6 +36,16 @@ mod passing {
mod failing { mod failing {
use crate::html; use crate::html;
#[test]
fn mask_icon() {
assert!(!html::is_icon("mask-icon"));
}
#[test]
fn fluid_icon() {
assert!(!html::is_icon("fluid-icon"));
}
#[test] #[test]
fn stylesheet() { fn stylesheet() {
assert!(!html::is_icon("stylesheet")); assert!(!html::is_icon("stylesheet"));

View file

@ -1,6 +1,8 @@
mod add_favicon;
mod csp; mod csp;
mod embed_srcset; mod embed_srcset;
mod get_node_name; mod get_node_name;
mod has_favicon;
mod has_proper_integrity; mod has_proper_integrity;
mod is_icon; mod is_icon;
mod metadata_tag; mod metadata_tag;