first commit

This commit is contained in:
vflyson 2017-02-20 02:51:24 -05:00
parent a6bf742933
commit bb3a6680a7
11 changed files with 367 additions and 1 deletions

View file

@ -1,2 +1,33 @@
# monolith
Save HTML pages with ease
A data hoarder's dream come true:
bundle any web page into a stand-alone HTML file.
Unlike conventional "Save page as …", `monolith` saves the target
document **and** embeds JavaScript, CSS and image assets **all at once**,
resulting in a single HTML5 document that is easy to store and share.
Works both on remote and local targets.
If compared to saving websites with `wget -mpk http://news.ycombinator.com`,
`monolith` embeds all assets as data-URIs and therefore would display the page
exactly the same at any time, not depending on the Internet connection.
However, keep in mind that `monolith` is not aware of your browser's session.
### Installation
$ sudo npm install -g git@github.com:Y2Z/monolith.git
### Usage
$ monolith <local path>/index.html > mysite.html
or
$ monolith https://github.com > github.html
<!-- or -->
<!-- cat local.html | monolith - > local.html -->
### Options
- `-u`: output the result document as one big data-URI
- `-q`: don't be verbose
<!-- - `-a`: fix anchor href="" attributes for remote documents -->
### License
GPLv3

40
bin/index.js Executable file
View file

@ -0,0 +1,40 @@
#!/usr/bin/env nodejs
var compactor = require('../compactor.js');
var options = require('../options.js');
function printUsage () {
console.log("\nUsage: \n monolith https://github.com\n")
}
if (process.argv.length > 2) {
var target = null
for (var i = 2, ilen = process.argv.length; i < ilen; i++) {
var argument = process.argv[i]
if (argument == '--data-uri' || argument == '-u') {
options.outputFinalResultAsBase64 = true
} else if (argument == '--quiet' || argument == '-q') {
options.suppressVerboseOutput = true
} else {
if (!target) {
target = argument
} else {
// Can't have more than one target
target = null
break
}
}
}
if (target) {
compactor(target, function(error, result) {
console.log(result)
})
} else {
printUsage()
}
} else {
printUsage()
}

49
compactor.js Executable file
View file

@ -0,0 +1,49 @@
#!/usr/bin/env node
'use strict'
var path = require('path')
var jsdom = require('jsdom')
var options = require('./options.js');
var functions = require('./functions.js')
var absoluteURLPath = functions.absoluteURLPath,
isURL = functions.isURL,
base64 = functions.base64,
resolve = functions.resolve,
retrieveFile = functions.retrieveFile
var modules = [
// 1. CSS
require('./modules/css.js').parser,
// 2. JS
require('./modules/js.js').parser,
// 3. images
require('./modules/img.js').parser,
// 4. favicon
require('./modules/favicon.js').parser,
// 5. anchors
require('./modules/anchors.js').parser,
]
function monolith (targetDocumentPath, callback) {
// Determine the absolute initial document path
var absBasePath = isURL(targetDocumentPath)
? absoluteURLPath(targetDocumentPath)
: path.dirname(path.resolve(targetDocumentPath))
absBasePath += '/' // Append trailing slash
// Retrieve the root document to use as a base
var rootFileContent = retrieveFile(absBasePath, targetDocumentPath)
// Convert the target document into a DOM tree
jsdom.env(rootFileContent, [], function (err, window) {
for (var i = 0, ilen = modules.length; i < ilen; i++)
modules[i](window, absBasePath)
var result = window.document.documentElement.innerHTML
callback(null, options.outputFinalResultAsBase64 ? base64(result) : result)
})
}
module.exports = monolith

67
functions.js Normal file
View file

@ -0,0 +1,67 @@
var fs = require('fs')
var path = require('path')
var url = require('url')
var request = require('sync-request')
var options = require('./options.js');
var cache = {}
// Note: http://site.com/image/icons/home.png -> http://site.com/image/icons
function absoluteURLPath (aURL) {
var URL = url.parse(aURL)
return URL.protocol + '//' + URL.host + URL.path
}
var reURL = /^https?:\/\//i // TODO file:///
function isURL (aPath) { return reURL.test(aPath) }
function base64 (aInput) { return new Buffer(aInput).toString('base64') }
function resolve (aFrom, aTo) {
if (isURL(aFrom)) {
var URL = url.parse(aFrom)
if (aTo[0] == '/') { // (http://site.com/article/1, /css/main.css)
if (aTo[1] == '/') { // (http://site.com/article/1, //images/1.png)
return URL.protocol + aTo
} else {
return url.resolve(URL.protocol + '//' + URL.host, aTo)
}
} else if (isURL(aTo)) { // (http://site.com, http://site.com/css/main.css)
return aTo
} else { // (http://site.com, css/main.css)
return url.resolve(aFrom, aTo)
}
} else {
return path.resolve(aFrom, aTo)
}
}
function retrieveFile (aAbsBasePath, aFilePath, aBinary) {
var fullFilePath = resolve(aAbsBasePath, aFilePath)
var format = aBinary ? 'base64' : 'utf8'
var cacheKey = fullFilePath + '@' + format
if (isURL(fullFilePath)) {
if (cacheKey in cache) {
return cache[cacheKey]
} else {
try {
var res = request('GET', fullFilePath)
if (!options.suppressVerboseOutput)
console.warn('Retrieving file', fullFilePath, '...')
return cache[cacheKey] = res.getBody(format)
} catch (httpError) {
return ''
}
}
} else {
return fs.readFileSync(fullFilePath, format)
}
}
module.exports = { absoluteURLPath, isURL, base64, resolve, retrieveFile }

21
modules/anchors.js Normal file
View file

@ -0,0 +1,21 @@
'use strict';
var functions = require('../functions.js')
module.exports = {
parser: function (window, absBasePath) {
var anchors = window.document.getElementsByTagName('a')
for (var i = 0, ilen = anchors.length; i < ilen; i++) {
if (anchors[i].getAttribute('href')) {
var anchor = anchors[i]
var href = anchor.getAttribute('href').trim()
var absoluteURL = functions.resolve(absBasePath, href)
anchor.setAttribute('href', absoluteURL)
}
}
}
}

20
modules/css.js Normal file
View file

@ -0,0 +1,20 @@
'use strict';
var functions = require('../functions.js')
var retrieveFile = functions.retrieveFile
module.exports = {
parser: function (window, absBasePath) {
var links = window.document.head.getElementsByTagName('link')
for (var i = 0, ilen = links.length; i < ilen; i++) {
if (links[i].getAttribute('rel') == 'stylesheet') {
var data = retrieveFile(absBasePath, links[i].getAttribute('href').trim(), true)
links[i].setAttribute('href', "data:text/css;base64," + data)
}
}
}
}

24
modules/favicon.js Normal file
View file

@ -0,0 +1,24 @@
'use strict'
var functions = require('../functions.js')
var retrieveFile = functions.retrieveFile
var mime = require('./img.js').mime
//var reIcon = /^([a-z]+\s)?icon(\s[a-z]+)?$/i
var reIcon = /icon/i
module.exports = {
parser: function (window, absBasePath) {
var links = window.document.head.getElementsByTagName('link')
for (var i = 0, ilen = links.length; i < ilen; i++) {
if (reIcon.test(links[i].getAttribute('rel'))) {
var data = retrieveFile(absBasePath, links[i].getAttribute('href').trim(), true)
links[i].setAttribute('href', "data:" + mime(data) + "base64," + data)
}
}
}
}

57
modules/img.js Normal file
View file

@ -0,0 +1,57 @@
'use strict';
var functions = require('../functions.js')
var retrieveFile = functions.retrieveFile
function mime (data) {
var mime = 'image/jpeg'
if (~data.indexOf('iVBORw0K'))
mime = 'image/png'
else if (~data.indexOf('R0lGODlh'))
mime = 'image/gif'
else if (~data.indexOf('<?xml'))
mime = 'image/svg+xml'
return mime
}
module.exports = {
mime: mime,
parser: function (window, absBasePath) {
// <img>, <picture> <img>
var imgs = window.document.getElementsByTagName('img')
for (var i = 0, ilen = imgs.length; i < ilen; i++) {
var img = imgs[i]
if (img.getAttribute('src')) {
var data = retrieveFile(absBasePath, img.getAttribute('src').trim(), true)
img.setAttribute('src', "data:" + mime(data) + ";base64," + data)
}
}
// <picture> <source>
var pictures = window.document.getElementsByTagName('picture')
for (var i = 0, ilen = pictures.length; i < ilen; i++) {
var picture = pictures[i]
var sources = picture.getElementsByTagName('source')
for (var s = 0, slen = sources.length; s < slen; s++) {
var source = sources[s]
if (source.getAttribute('srcset')) {
var data = retrieveFile(absBasePath, source.getAttribute('srcset').trim(), true)
var type = source.getAttribute('type')
source.setAttribute('srcset', "data:" + (type || mime(data)) + ";base64," + data)
}
}
}
}
}

27
modules/js.js Normal file
View file

@ -0,0 +1,27 @@
'use strict';
var functions = require('../functions.js')
var retrieveFile = functions.retrieveFile
var dataURI = true // set to true to convert the src attribute to a dataURI link
module.exports = {
parser: function (window, absBasePath) {
var scripts = window.document.getElementsByTagName('script')
for (var i = 0, ilen = scripts.length; i < ilen; i++) {
if (scripts[i].getAttribute('src')) {
var data = retrieveFile(absBasePath, scripts[i].getAttribute('src').trim(), dataURI)
if (dataURI) {
scripts[i].setAttribute('src', "data:text/javascript;base64," + data)
} else {
scripts[i].removeAttribute('src')
scripts[i].innerHTML = data
}
}
}
}
}

6
options.js Normal file
View file

@ -0,0 +1,6 @@
var options = {
outputFinalResultAsBase64: false,
suppressVerboseOutput: false
}
module.exports = options

24
package.json Normal file
View file

@ -0,0 +1,24 @@
{
"name": "monolith",
"version": "0.4.5",
"description": "Save HTML pages with ease",
"main": "index.js",
"dependencies": {
"sync-request": "^3.0.1",
"jsdom": "^9.9.1"
},
"bin": {
"monolith": "bin/index.js"
},
"scripts": {
"test": "bin/index.js https://github.com > github.html"
},
"keywords": [
"html5",
"monolith",
"one-for-all",
"all-for-one"
],
"author": "Y2Z",
"license": "GPL-3.0"
}