first commit
This commit is contained in:
parent
a6bf742933
commit
bb3a6680a7
11 changed files with 367 additions and 1 deletions
33
README.md
33
README.md
|
@ -1,2 +1,33 @@
|
|||
# monolith
|
||||
Save HTML pages with ease
|
||||
A data hoarder's dream come true:
|
||||
bundle any web page into a stand-alone HTML file.
|
||||
|
||||
Unlike conventional "Save page as …", `monolith` saves the target
|
||||
document **and** embeds JavaScript, CSS and image assets **all at once**,
|
||||
resulting in a single HTML5 document that is easy to store and share.
|
||||
|
||||
Works both on remote and local targets.
|
||||
|
||||
If compared to saving websites with `wget -mpk http://news.ycombinator.com`,
|
||||
`monolith` embeds all assets as data-URIs and therefore would display the page
|
||||
exactly the same at any time, not depending on the Internet connection.
|
||||
|
||||
However, keep in mind that `monolith` is not aware of your browser's session.
|
||||
|
||||
### Installation
|
||||
$ sudo npm install -g git@github.com:Y2Z/monolith.git
|
||||
|
||||
### Usage
|
||||
$ monolith <local path>/index.html > mysite.html
|
||||
or
|
||||
$ monolith https://github.com > github.html
|
||||
<!-- or -->
|
||||
<!-- cat local.html | monolith - > local.html -->
|
||||
|
||||
### Options
|
||||
- `-u`: output the result document as one big data-URI
|
||||
- `-q`: don't be verbose
|
||||
<!-- - `-a`: fix anchor href="" attributes for remote documents -->
|
||||
|
||||
### License
|
||||
GPLv3
|
||||
|
|
40
bin/index.js
Executable file
40
bin/index.js
Executable file
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env nodejs
|
||||
|
||||
var compactor = require('../compactor.js');
|
||||
var options = require('../options.js');
|
||||
|
||||
function printUsage () {
|
||||
console.log("\nUsage: \n monolith https://github.com\n")
|
||||
}
|
||||
|
||||
if (process.argv.length > 2) {
|
||||
var target = null
|
||||
|
||||
for (var i = 2, ilen = process.argv.length; i < ilen; i++) {
|
||||
var argument = process.argv[i]
|
||||
|
||||
if (argument == '--data-uri' || argument == '-u') {
|
||||
options.outputFinalResultAsBase64 = true
|
||||
} else if (argument == '--quiet' || argument == '-q') {
|
||||
options.suppressVerboseOutput = true
|
||||
} else {
|
||||
if (!target) {
|
||||
target = argument
|
||||
} else {
|
||||
// Can't have more than one target
|
||||
target = null
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (target) {
|
||||
compactor(target, function(error, result) {
|
||||
console.log(result)
|
||||
})
|
||||
} else {
|
||||
printUsage()
|
||||
}
|
||||
} else {
|
||||
printUsage()
|
||||
}
|
49
compactor.js
Executable file
49
compactor.js
Executable file
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
'use strict'
|
||||
|
||||
var path = require('path')
|
||||
var jsdom = require('jsdom')
|
||||
|
||||
var options = require('./options.js');
|
||||
var functions = require('./functions.js')
|
||||
var absoluteURLPath = functions.absoluteURLPath,
|
||||
isURL = functions.isURL,
|
||||
base64 = functions.base64,
|
||||
resolve = functions.resolve,
|
||||
retrieveFile = functions.retrieveFile
|
||||
|
||||
var modules = [
|
||||
// 1. CSS
|
||||
require('./modules/css.js').parser,
|
||||
// 2. JS
|
||||
require('./modules/js.js').parser,
|
||||
// 3. images
|
||||
require('./modules/img.js').parser,
|
||||
// 4. favicon
|
||||
require('./modules/favicon.js').parser,
|
||||
// 5. anchors
|
||||
require('./modules/anchors.js').parser,
|
||||
]
|
||||
|
||||
function monolith (targetDocumentPath, callback) {
|
||||
// Determine the absolute initial document path
|
||||
var absBasePath = isURL(targetDocumentPath)
|
||||
? absoluteURLPath(targetDocumentPath)
|
||||
: path.dirname(path.resolve(targetDocumentPath))
|
||||
absBasePath += '/' // Append trailing slash
|
||||
|
||||
// Retrieve the root document to use as a base
|
||||
var rootFileContent = retrieveFile(absBasePath, targetDocumentPath)
|
||||
|
||||
// Convert the target document into a DOM tree
|
||||
jsdom.env(rootFileContent, [], function (err, window) {
|
||||
for (var i = 0, ilen = modules.length; i < ilen; i++)
|
||||
modules[i](window, absBasePath)
|
||||
|
||||
var result = window.document.documentElement.innerHTML
|
||||
callback(null, options.outputFinalResultAsBase64 ? base64(result) : result)
|
||||
})
|
||||
}
|
||||
|
||||
module.exports = monolith
|
67
functions.js
Normal file
67
functions.js
Normal file
|
@ -0,0 +1,67 @@
|
|||
var fs = require('fs')
|
||||
var path = require('path')
|
||||
var url = require('url')
|
||||
var request = require('sync-request')
|
||||
|
||||
var options = require('./options.js');
|
||||
|
||||
var cache = {}
|
||||
|
||||
// Note: http://site.com/image/icons/home.png -> http://site.com/image/icons
|
||||
function absoluteURLPath (aURL) {
|
||||
var URL = url.parse(aURL)
|
||||
|
||||
return URL.protocol + '//' + URL.host + URL.path
|
||||
}
|
||||
|
||||
var reURL = /^https?:\/\//i // TODO file:///
|
||||
|
||||
function isURL (aPath) { return reURL.test(aPath) }
|
||||
function base64 (aInput) { return new Buffer(aInput).toString('base64') }
|
||||
|
||||
function resolve (aFrom, aTo) {
|
||||
if (isURL(aFrom)) {
|
||||
var URL = url.parse(aFrom)
|
||||
|
||||
if (aTo[0] == '/') { // (http://site.com/article/1, /css/main.css)
|
||||
if (aTo[1] == '/') { // (http://site.com/article/1, //images/1.png)
|
||||
return URL.protocol + aTo
|
||||
} else {
|
||||
return url.resolve(URL.protocol + '//' + URL.host, aTo)
|
||||
}
|
||||
} else if (isURL(aTo)) { // (http://site.com, http://site.com/css/main.css)
|
||||
return aTo
|
||||
} else { // (http://site.com, css/main.css)
|
||||
return url.resolve(aFrom, aTo)
|
||||
}
|
||||
} else {
|
||||
return path.resolve(aFrom, aTo)
|
||||
}
|
||||
}
|
||||
|
||||
function retrieveFile (aAbsBasePath, aFilePath, aBinary) {
|
||||
var fullFilePath = resolve(aAbsBasePath, aFilePath)
|
||||
var format = aBinary ? 'base64' : 'utf8'
|
||||
var cacheKey = fullFilePath + '@' + format
|
||||
|
||||
if (isURL(fullFilePath)) {
|
||||
if (cacheKey in cache) {
|
||||
return cache[cacheKey]
|
||||
} else {
|
||||
try {
|
||||
var res = request('GET', fullFilePath)
|
||||
|
||||
if (!options.suppressVerboseOutput)
|
||||
console.warn('Retrieving file', fullFilePath, '...')
|
||||
|
||||
return cache[cacheKey] = res.getBody(format)
|
||||
} catch (httpError) {
|
||||
return ''
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return fs.readFileSync(fullFilePath, format)
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { absoluteURLPath, isURL, base64, resolve, retrieveFile }
|
21
modules/anchors.js
Normal file
21
modules/anchors.js
Normal file
|
@ -0,0 +1,21 @@
|
|||
'use strict';
|
||||
|
||||
var functions = require('../functions.js')
|
||||
|
||||
module.exports = {
|
||||
|
||||
parser: function (window, absBasePath) {
|
||||
var anchors = window.document.getElementsByTagName('a')
|
||||
|
||||
for (var i = 0, ilen = anchors.length; i < ilen; i++) {
|
||||
if (anchors[i].getAttribute('href')) {
|
||||
var anchor = anchors[i]
|
||||
var href = anchor.getAttribute('href').trim()
|
||||
var absoluteURL = functions.resolve(absBasePath, href)
|
||||
|
||||
anchor.setAttribute('href', absoluteURL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
20
modules/css.js
Normal file
20
modules/css.js
Normal file
|
@ -0,0 +1,20 @@
|
|||
'use strict';
|
||||
|
||||
var functions = require('../functions.js')
|
||||
var retrieveFile = functions.retrieveFile
|
||||
|
||||
module.exports = {
|
||||
|
||||
parser: function (window, absBasePath) {
|
||||
var links = window.document.head.getElementsByTagName('link')
|
||||
|
||||
for (var i = 0, ilen = links.length; i < ilen; i++) {
|
||||
if (links[i].getAttribute('rel') == 'stylesheet') {
|
||||
var data = retrieveFile(absBasePath, links[i].getAttribute('href').trim(), true)
|
||||
|
||||
links[i].setAttribute('href', "data:text/css;base64," + data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
24
modules/favicon.js
Normal file
24
modules/favicon.js
Normal file
|
@ -0,0 +1,24 @@
|
|||
'use strict'
|
||||
|
||||
var functions = require('../functions.js')
|
||||
var retrieveFile = functions.retrieveFile
|
||||
var mime = require('./img.js').mime
|
||||
|
||||
//var reIcon = /^([a-z]+\s)?icon(\s[a-z]+)?$/i
|
||||
var reIcon = /icon/i
|
||||
|
||||
module.exports = {
|
||||
|
||||
parser: function (window, absBasePath) {
|
||||
var links = window.document.head.getElementsByTagName('link')
|
||||
|
||||
for (var i = 0, ilen = links.length; i < ilen; i++) {
|
||||
if (reIcon.test(links[i].getAttribute('rel'))) {
|
||||
var data = retrieveFile(absBasePath, links[i].getAttribute('href').trim(), true)
|
||||
|
||||
links[i].setAttribute('href', "data:" + mime(data) + "base64," + data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
57
modules/img.js
Normal file
57
modules/img.js
Normal file
|
@ -0,0 +1,57 @@
|
|||
'use strict';
|
||||
|
||||
var functions = require('../functions.js')
|
||||
var retrieveFile = functions.retrieveFile
|
||||
|
||||
function mime (data) {
|
||||
var mime = 'image/jpeg'
|
||||
|
||||
if (~data.indexOf('iVBORw0K'))
|
||||
mime = 'image/png'
|
||||
else if (~data.indexOf('R0lGODlh'))
|
||||
mime = 'image/gif'
|
||||
else if (~data.indexOf('<?xml'))
|
||||
mime = 'image/svg+xml'
|
||||
|
||||
return mime
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
mime: mime,
|
||||
|
||||
parser: function (window, absBasePath) {
|
||||
// <img>, <picture> <img>
|
||||
var imgs = window.document.getElementsByTagName('img')
|
||||
|
||||
for (var i = 0, ilen = imgs.length; i < ilen; i++) {
|
||||
var img = imgs[i]
|
||||
|
||||
if (img.getAttribute('src')) {
|
||||
var data = retrieveFile(absBasePath, img.getAttribute('src').trim(), true)
|
||||
|
||||
img.setAttribute('src', "data:" + mime(data) + ";base64," + data)
|
||||
}
|
||||
}
|
||||
|
||||
// <picture> <source>
|
||||
var pictures = window.document.getElementsByTagName('picture')
|
||||
|
||||
for (var i = 0, ilen = pictures.length; i < ilen; i++) {
|
||||
var picture = pictures[i]
|
||||
var sources = picture.getElementsByTagName('source')
|
||||
|
||||
for (var s = 0, slen = sources.length; s < slen; s++) {
|
||||
var source = sources[s]
|
||||
|
||||
if (source.getAttribute('srcset')) {
|
||||
var data = retrieveFile(absBasePath, source.getAttribute('srcset').trim(), true)
|
||||
var type = source.getAttribute('type')
|
||||
|
||||
source.setAttribute('srcset', "data:" + (type || mime(data)) + ";base64," + data)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
27
modules/js.js
Normal file
27
modules/js.js
Normal file
|
@ -0,0 +1,27 @@
|
|||
'use strict';
|
||||
|
||||
var functions = require('../functions.js')
|
||||
var retrieveFile = functions.retrieveFile
|
||||
|
||||
var dataURI = true // set to true to convert the src attribute to a dataURI link
|
||||
|
||||
module.exports = {
|
||||
|
||||
parser: function (window, absBasePath) {
|
||||
var scripts = window.document.getElementsByTagName('script')
|
||||
|
||||
for (var i = 0, ilen = scripts.length; i < ilen; i++) {
|
||||
if (scripts[i].getAttribute('src')) {
|
||||
var data = retrieveFile(absBasePath, scripts[i].getAttribute('src').trim(), dataURI)
|
||||
|
||||
if (dataURI) {
|
||||
scripts[i].setAttribute('src', "data:text/javascript;base64," + data)
|
||||
} else {
|
||||
scripts[i].removeAttribute('src')
|
||||
scripts[i].innerHTML = data
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
6
options.js
Normal file
6
options.js
Normal file
|
@ -0,0 +1,6 @@
|
|||
var options = {
|
||||
outputFinalResultAsBase64: false,
|
||||
suppressVerboseOutput: false
|
||||
}
|
||||
|
||||
module.exports = options
|
24
package.json
Normal file
24
package.json
Normal file
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "monolith",
|
||||
"version": "0.4.5",
|
||||
"description": "Save HTML pages with ease",
|
||||
"main": "index.js",
|
||||
"dependencies": {
|
||||
"sync-request": "^3.0.1",
|
||||
"jsdom": "^9.9.1"
|
||||
},
|
||||
"bin": {
|
||||
"monolith": "bin/index.js"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "bin/index.js https://github.com > github.html"
|
||||
},
|
||||
"keywords": [
|
||||
"html5",
|
||||
"monolith",
|
||||
"one-for-all",
|
||||
"all-for-one"
|
||||
],
|
||||
"author": "Y2Z",
|
||||
"license": "GPL-3.0"
|
||||
}
|
Loading…
Reference in a new issue