diff --git a/.eslintrc.json b/.eslintrc.json index d5e4e768..7dcb705c 100755 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -102,6 +102,7 @@ "$": false, "jQuery": false, "log": false, + "app": false, "COMPILE_TIME": false, "COMPILE_MSG": false, diff --git a/package-lock.json b/package-lock.json index 8a35ebb1..ed688844 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5125,12 +5125,14 @@ "balanced-match": { "version": "1.0.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "brace-expansion": { "version": "1.1.11", "bundled": true, "dev": true, + "optional": true, "requires": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" @@ -5150,7 +5152,8 @@ "concat-map": { "version": "0.0.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "console-control-strings": { "version": "1.1.0", @@ -5298,6 +5301,7 @@ "version": "3.0.4", "bundled": true, "dev": true, + "optional": true, "requires": { "brace-expansion": "^1.1.7" } diff --git a/src/core/Utils.mjs b/src/core/Utils.mjs index f70e2941..8e69b020 100755 --- a/src/core/Utils.mjs +++ b/src/core/Utils.mjs @@ -832,8 +832,9 @@ class Utils { const buff = await Utils.readFile(file); const blob = new Blob( [buff], - {type: "octet/stream"} + {type: file.type || "octet/stream"} ); + const blobURL = URL.createObjectURL(blob); const html = `
@@ -848,10 +849,19 @@ class Utils { ${file.size.toLocaleString()} bytes + href="${blobURL}" + download="${Utils.escapeHtml(file.name)}" + data-toggle="tooltip"> save + + open_in_browser +
@@ -1163,6 +1173,21 @@ String.prototype.count = function(chr) { }; +/** + * Wrapper for self.sendStatusMessage to handle different environments. + * + * @param {string} msg + */ +export function sendStatusMessage(msg) { + if (ENVIRONMENT_IS_WORKER()) + self.sendStatusMessage(msg); + else if (ENVIRONMENT_IS_WEB()) + app.alert(msg, 10000); + else if (ENVIRONMENT_IS_NODE()) + log.debug(msg); +} + + /* * Polyfills */ diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 8235ab10..337b89d2 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -254,7 +254,8 @@ "XPath expression", "JPath expression", "CSS selector", - "Extract EXIF" + "Extract EXIF", + "Extract Files" ] }, { @@ -348,6 +349,7 @@ "ops": [ "Detect File Type", "Scan for Embedded Files", + "Extract Files", "Remove EXIF", "Extract EXIF" ] diff --git a/src/core/lib/BCD.mjs b/src/core/lib/BCD.mjs old mode 100755 new mode 100644 diff --git a/src/core/lib/Base58.mjs b/src/core/lib/Base58.mjs old mode 100755 new mode 100644 diff --git a/src/core/lib/Base64.mjs b/src/core/lib/Base64.mjs old mode 100755 new mode 100644 diff --git a/src/core/lib/CanvasComponents.mjs b/src/core/lib/CanvasComponents.mjs old mode 100755 new mode 100644 diff --git a/src/core/lib/FileSignatures.mjs b/src/core/lib/FileSignatures.mjs new file mode 100644 index 00000000..61e37b88 --- /dev/null +++ b/src/core/lib/FileSignatures.mjs @@ -0,0 +1,1727 @@ +/** + * File signatures and extractor functions + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + * + */ +import Stream from "./Stream"; + +/** + * A categorised table of file types, including signatures to identify them and functions + * to extract them where possible. + */ +export const FILE_SIGNATURES = { + "Images": [ + { + name: "Joint Photographic Experts Group image", + extension: "jpg,jpeg,jpe,thm,mpo", + mime: "image/jpeg", + description: "", + signature: { + 0: 0xff, + 1: 0xd8, + 2: 0xff, + 3: [0xc0, 0xc4, 0xdb, 0xdd, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe7, 0xe8, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xfe] + }, + extractor: extractJPEG + }, + { + name: "Graphics Interchange Format image", + extension: "gif", + mime: "image/gif", + description: "", + signature: { + 0: 0x47, // GIF + 1: 0x49, + 2: 0x46, + 3: 0x38, // 8 + 4: [0x37, 0x39], // 7|9 + 5: 0x61 // a + }, + extractor: null + }, + { + name: "Portable Network Graphics image", + extension: "png", + mime: "image/png", + description: "", + signature: { + 0: 0x89, + 1: 0x50, // PNG + 2: 0x4e, + 3: 0x47, + 4: 0x0d, + 5: 0x0a, + 6: 0x1a, + 7: 0x0a + }, + extractor: extractPNG + }, + { + name: "WEBP Image", + extension: "webp", + mime: "image/webp", + description: "", + signature: { + 8: 0x57, + 9: 0x45, + 10: 0x42, + 11: 0x50 + }, + extractor: null + }, + { + name: "Camera Image File Format", + extension: "crw", + mime: "image/x-canon-crw", + description: "", + signature: { + 6: 0x48, // HEAPCCDR + 7: 0x45, + 8: 0x41, + 9: 0x50, + 10: 0x43, + 11: 0x43, + 12: 0x44, + 13: 0x52 + }, + extractor: null + }, + { // Place before tiff check + name: "Canon CR2 raw image", + extension: "cr2", + mime: "image/x-canon-cr2", + description: "", + signature: [ + { + 0: 0x49, + 1: 0x49, + 2: 0x2a, + 3: 0x0, + 8: 0x43, + 9: 0x52 + }, + { + 0: 0x4d, + 1: 0x4d, + 2: 0x0, + 3: 0x2a, + 8: 0x43, + 9: 0x52 + } + ], + extractor: null + }, + { + name: "Tagged Image File Format image", + extension: "tif", + mime: "image/tiff", + description: "", + signature: [ + { + 0: 0x49, + 1: 0x49, + 2: 0x2a, + 3: 0x0 + }, + { + 0: 0x4d, + 1: 0x4d, + 2: 0x0, + 3: 0x2a + } + ], + extractor: null + }, + { + name: "Bitmap image", + extension: "bmp", + mime: "image/bmp", + description: "", + signature: { + 0: 0x42, + 1: 0x4d, + 7: 0x0, + 9: 0x0, + 14: [0x0c, 0x28, 0x38, 0x40, 0x6c, 0x7c], + 15: 0x0, + 16: 0x0, + 17: 0x0 + }, + extractor: extractBMP + }, + { + name: "JPEG Extended Range image", + extension: "jxr", + mime: "image/vnd.ms-photo", + description: "", + signature: { + 0: 0x49, + 1: 0x49, + 2: 0xbc + }, + extractor: null + }, + { + name: "Photoshop image", + extension: "psd", + mime: "image/vnd.adobe.photoshop", + description: "", + signature: { + 0: 0x38, + 1: 0x42, + 2: 0x50, + 3: 0x53, + 4: 0x0, + 5: 0x1, + 6: 0x0, + 7: 0x0, + 8: 0x0, + 9: 0x0, + 10: 0x0, + 11: 0x0 + }, + extractor: null + }, + { + name: "Paint Shop Pro image", + extension: "psp", + mime: "image/psp", + description: "", + signature: [ + { + 0: 0x50, // Paint Shop Pro Im + 1: 0x61, + 2: 0x69, + 3: 0x6e, + 4: 0x74, + 5: 0x20, + 6: 0x53, + 7: 0x68, + 8: 0x6f, + 9: 0x70, + 10: 0x20, + 11: 0x50, + 12: 0x72, + 13: 0x6f, + 14: 0x20, + 15: 0x49, + 16: 0x6d + }, + { + 0: 0x7e, + 1: 0x42, + 2: 0x4b, + 3: 0x0 + } + ], + extractor: null + }, + { + name: "Icon image", + extension: "ico", + mime: "image/x-icon", + description: "", + signature: { + 0: 0x0, + 1: 0x0, + 2: 0x1, + 3: 0x0, + 4: [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15], + 5: 0x0, + 6: [0x10, 0x20, 0x30, 0x40, 0x80], + 7: [0x10, 0x20, 0x30, 0x40, 0x80], + 9: 0x00, + 10: [0x0, 0x1] + }, + extractor: null + } + ], + "Video": [ + { // Place before webm + name: "Matroska Multimedia Container", + extension: "mkv", + mime: "video/x-matroska", + description: "", + signature: { + 31: 0x6d, + 32: 0x61, + 33: 0x74, + 34: 0x72, + 35: 0x6f, + 36: 0x73, + 37: 0x6b, + 38: 0x61 + }, + extractor: null + }, + { + name: "WEBM video", + extension: "webm", + mime: "video/webm", + description: "", + signature: { + 0: 0x1a, + 1: 0x45, + 2: 0xdf, + 3: 0xa3 + }, + extractor: null + }, + { + name: "MPEG-4 video", + extension: "mp4", + mime: "video/mp4", + description: "", + signature: [ + { + 0: 0x0, + 1: 0x0, + 2: 0x0, + 3: [0x18, 0x20], + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70 + }, + { + 0: 0x33, // 3gp5 + 1: 0x67, + 2: 0x70, + 3: 0x35 + }, + { + 0: 0x0, + 1: 0x0, + 2: 0x0, + 3: 0x1c, + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70, + 8: 0x6d, + 9: 0x70, + 10: 0x34, + 11: 0x32, + 16: 0x6d, // mp41mp42isom + 17: 0x70, + 18: 0x34, + 19: 0x31, + 20: 0x6d, + 21: 0x70, + 22: 0x34, + 23: 0x32, + 24: 0x69, + 25: 0x73, + 26: 0x6f, + 27: 0x6d + } + ], + extractor: null + }, + { + name: "M4V video", + extension: "m4v", + mime: "video/x-m4v", + description: "", + signature: { + 0: 0x0, + 1: 0x0, + 2: 0x0, + 3: 0x1c, + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70, + 8: 0x4d, + 9: 0x34, + 10: 0x56 + }, + extractor: null + }, + { + name: "Quicktime video", + extension: "mov", + mime: "video/quicktime", + description: "", + signature: { + 0: 0x0, + 1: 0x0, + 2: 0x0, + 3: 0x14, + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70 + }, + extractor: null + }, + { + name: "Audio Video Interleave", + extension: "avi", + mime: "video/x-msvideo", + description: "", + signature: { + 0: 0x52, + 1: 0x49, + 2: 0x46, + 3: 0x46, + 8: 0x41, + 9: 0x56, + 10: 0x49 + }, + extractor: null + }, + { + name: "Windows Media Video", + extension: "wmv", + mime: "video/x-ms-wmv", + description: "", + signature: { + 0: 0x30, + 1: 0x26, + 2: 0xb2, + 3: 0x75, + 4: 0x8e, + 5: 0x66, + 6: 0xcf, + 7: 0x11, + 8: 0xa6, + 9: 0xd9 + }, + extractor: null + }, + { + name: "MPEG video", + extension: "mpg", + mime: "video/mpeg", + description: "", + signature: { + 0: 0x0, + 1: 0x0, + 2: 0x1, + 3: 0xba + }, + extractor: null + }, + { + name: "Flash Video", + extension: "flv", + mime: "video/x-flv", + description: "", + signature: { + 0: 0x46, + 1: 0x4c, + 2: 0x56, + 3: 0x1 + }, + extractor: extractFLV + }, + ], + "Audio": [ + { + name: "Waveform Audio", + extension: "wav", + mime: "audio/x-wav", + description: "", + signature: { + 0: 0x52, + 1: 0x49, + 2: 0x46, + 3: 0x46, + 8: 0x57, + 9: 0x41, + 10: 0x56, + 11: 0x45 + }, + extractor: null + }, + { + name: "OGG audio", + extension: "ogg", + mime: "audio/ogg", + description: "", + signature: { + 0: 0x4f, + 1: 0x67, + 2: 0x67, + 3: 0x53 + }, + extractor: null + }, + { + name: "Musical Instrument Digital Interface audio", + extension: "midi", + mime: "audio/midi", + description: "", + signature: { + 0: 0x4d, + 1: 0x54, + 2: 0x68, + 3: 0x64 + }, + extractor: null + }, + { + name: "MPEG-3 audio", + extension: "mp3", + mime: "audio/mpeg", + description: "", + signature: [ + { + 0: 0x49, + 1: 0x44, + 2: 0x33 + }, + { + 0: 0xff, + 1: 0xfb + } + ], + extractor: null + }, + { + name: "MPEG-4 Part 14 audio", + extension: "m4a", + mime: "audio/m4a", + description: "", + signature: [ + { + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70, + 8: 0x4d, + 9: 0x34, + 10: 0x41 + }, + { + 0: 0x4d, + 1: 0x34, + 2: 0x41, + 3: 0x20 + } + ], + extractor: null + }, + { + name: "Free Lossless Audio Codec", + extension: "flac", + mime: "audio/x-flac", + description: "", + signature: { + 0: 0x66, + 1: 0x4c, + 2: 0x61, + 3: 0x43 + }, + extractor: null + }, + { + name: "Adaptive Multi-Rate audio codec", + extension: "amr", + mime: "audio/amr", + description: "", + signature: { + 0: 0x23, + 1: 0x21, + 2: 0x41, + 3: 0x4d, + 4: 0x52, + 5: 0x0a + }, + extractor: null + }, + ], + "Documents": [ + { + name: "Portable Document Format", + extension: "pdf", + mime: "application/pdf", + description: "", + signature: { + 0: 0x25, + 1: 0x50, + 2: 0x44, + 3: 0x46 + }, + extractor: extractPDF + }, + { + name: "PostScript", + extension: "ps", + mime: "application/postscript", + description: "", + signature: { + 0: 0x25, + 1: 0x21 + }, + extractor: null + }, + { + name: "Rich Text Format", + extension: "rtf", + mime: "application/rtf", + description: "", + signature: { + 0: 0x7b, + 1: 0x5c, + 2: 0x72, + 3: 0x74, + 4: 0x66 + }, + extractor: extractRTF + }, + { + name: "Microsoft Office documents/OLE2", + extension: "ole2,doc,xls,dot,ppt,xla,ppa,pps,pot,msi,sdw,db,vsd,msg", + mime: "application/msword,application/vnd.ms-excel,application/vnd.ms-powerpoint", + description: "Microsoft Office documents", + signature: { + 0: 0xd0, + 1: 0xcf, + 2: 0x11, + 3: 0xe0, + 4: 0xa1, + 5: 0xb1, + 6: 0x1a, + 7: 0xe1 + }, + extractor: null + }, + { + name: "Microsoft Office 2007+ documents", + extension: "docx,xlsx,pptx", + mime: "application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.openxmlformats-officedocument.presentationml.presentation", + description: "", + signature: { + 38: 0x5f, // _Types].xml + 39: 0x54, + 40: 0x79, + 41: 0x70, + 42: 0x65, + 43: 0x73, + 44: 0x5d, + 45: 0x2e, + 46: 0x78, + 47: 0x6d, + 48: 0x6c + }, + extractor: extractZIP + }, + { + name: "EPUB e-book", + extension: "epub", + mime: "application/epub+zip", + description: "", + signature: { + 0: 0x50, + 1: 0x4b, + 2: 0x3, + 3: 0x4, + 30: 0x6d, // mimetypeapplication/epub_zip + 31: 0x69, + 32: 0x6d, + 33: 0x65, + 34: 0x74, + 35: 0x79, + 36: 0x70, + 37: 0x65, + 38: 0x61, + 39: 0x70, + 40: 0x70, + 41: 0x6c, + 42: 0x69, + 43: 0x63, + 44: 0x61, + 45: 0x74, + 46: 0x69, + 47: 0x6f, + 48: 0x6e, + 49: 0x2f, + 50: 0x65, + 51: 0x70, + 52: 0x75, + 53: 0x62, + 54: 0x2b, + 55: 0x7a, + 56: 0x69, + 57: 0x70 + }, + extractor: extractZIP + }, + ], + "Applications": [ + { + name: "Windows Portable Executable", + extension: "exe,dll,drv,vxd,sys,ocx,vbx,com,fon,scr", + mime: "application/x-msdownload", + description: "", + signature: { + 0: 0x4d, + 1: 0x5a, + 3: [0x0, 0x1, 0x2], + 5: [0x0, 0x1, 0x2] + }, + extractor: extractMZPE + }, + { + name: "Executable and Linkable Format file", + extension: "elf,bin,axf,o,prx,so", + mime: "application/x-executable", + description: "Executable and Linkable Format file. No standard file extension.", + signature: { + 0: 0x7f, + 1: 0x45, + 2: 0x4c, + 3: 0x46 + }, + extractor: extractELF + }, + { + name: "Adobe Flash", + extension: "swf", + mime: "application/x-shockwave-flash", + description: "", + signature: { + 0: [0x43, 0x46], + 1: 0x57, + 2: 0x53 + }, + extractor: null + }, + { + name: "Java Class", + extension: "class", + mime: "application/java-vm", + description: "", + signature: { + 0: 0xca, + 1: 0xfe, + 2: 0xba, + 3: 0xbe + }, + extractor: null + }, + { + name: "Dalvik Executable", + extension: "dex", + mime: "application/octet-stream", + description: "Dalvik Executable as used by Android", + signature: { + 0: 0x64, + 1: 0x65, + 2: 0x78, + 3: 0x0a, + 4: 0x30, + 5: 0x33, + 6: 0x35, + 7: 0x0 + }, + extractor: null + }, + { + name: "Google Chrome Extension", + extension: "crx", + mime: "application/crx", + description: "Google Chrome extension or packaged app", + signature: { + 0: 0x43, + 1: 0x72, + 2: 0x32, + 3: 0x34 + }, + extractor: null + }, + ], + "Archives": [ + { + name: "PKZIP archive", + extension: "zip", + mime: "application/zip", + description: "", + signature: { + 0: 0x50, + 1: 0x4b, + 2: [0x3, 0x5, 0x7], + 3: [0x4, 0x6, 0x8] + }, + extractor: extractZIP + }, + { + name: "TAR archive", + extension: "tar", + mime: "application/x-tar", + description: "", + signature: { + 257: 0x75, + 258: 0x73, + 259: 0x74, + 260: 0x61, + 261: 0x72 + }, + extractor: null + }, + { + name: "Roshal Archive", + extension: "rar", + mime: "application/x-rar-compressed", + description: "", + signature: { + 0: 0x52, + 1: 0x61, + 2: 0x72, + 3: 0x21, + 4: 0x1a, + 5: 0x7, + 6: [0x0, 0x1] + }, + extractor: null + }, + { + name: "Gzip", + extension: "gz", + mime: "application/gzip", + description: "", + signature: { + 0: 0x1f, + 1: 0x8b, + 2: 0x8 + }, + extractor: extractGZIP + }, + { + name: "Bzip2", + extension: "bz2", + mime: "application/x-bzip2", + description: "", + signature: { + 0: 0x42, + 1: 0x5a, + 2: 0x68 + }, + extractor: null + }, + { + name: "7zip", + extension: "7z", + mime: "application/x-7z-compressed", + description: "", + signature: { + 0: 0x37, + 1: 0x7a, + 2: 0xbc, + 3: 0xaf, + 4: 0x27, + 5: 0x1c + }, + extractor: null + }, + { + name: "Zlib Deflate", + extension: "zlib", + mime: "application/x-deflate", + description: "", + signature: { + 0: 0x78, + 1: [0x1, 0x9c, 0xda, 0x5e] + }, + extractor: extractZlib + }, + { + name: "xz compression", + extension: "xz", + mime: "application/x-xz", + description: "", + signature: { + 0: 0xfd, + 1: 0x37, + 2: 0x7a, + 3: 0x58, + 4: 0x5a, + 5: 0x0 + }, + extractor: null + }, + { + name: "Tarball", + extension: "tar.z", + mime: "application/x-gtar", + description: "", + signature: { + 0: 0x1f, + 1: [0x9d, 0xa0] + }, + extractor: null + }, + { + name: "ISO disk image", + extension: "iso", + mime: "application/octet-stream", + description: "ISO 9660 CD/DVD image file", + signature: [ + { + 0x8001: 0x43, + 0x8002: 0x44, + 0x8003: 0x30, + 0x8004: 0x30, + 0x8005: 0x31 + }, + { + 0x8801: 0x43, + 0x8802: 0x44, + 0x8803: 0x30, + 0x8804: 0x30, + 0x8805: 0x31 + }, + { + 0x9001: 0x43, + 0x9002: 0x44, + 0x9003: 0x30, + 0x9004: 0x30, + 0x9005: 0x31 + } + ], + extractor: null + }, + { + name: "Virtual Machine Disk", + extension: "vmdk", + mime: "application/vmdk,application/x-virtualbox-vmdk", + description: "", + signature: { + 0: 0x4b, + 1: 0x44, + 2: 0x4d + }, + extractor: null + }, + ], + "Miscellaneous": [ + { + name: "UTF-8 text file", + extension: "txt", + mime: "text/plain", + description: "UTF-8 encoded Unicode byte order mark, commonly but not exclusively seen in text files.", + signature: { + 0: 0xef, + 1: 0xbb, + 2: 0xbf + }, + extractor: null + }, + { // Place before UTF-16 LE file + name: "UTF-32 LE file", + extension: "utf32le", + mime: "charset/utf32le", + description: "Little-endian UTF-32 encoded Unicode byte order mark.", + signature: { + 0: 0xff, + 1: 0xfe, + 2: 0x00, + 3: 0x00 + }, + extractor: null + }, + { + name: "UTF-16 LE file", + extension: "utf16le", + mime: "charset/utf16le", + description: "Little-endian UTF-16 encoded Unicode byte order mark.", + signature: { + 0: 0xff, + 1: 0xfe + }, + extractor: null + }, + { + name: "Web Open Font Format", + extension: "woff", + mime: "application/font-woff", + description: "", + signature: { + 0: 0x77, + 1: 0x4f, + 2: 0x46, + 3: 0x46, + 4: 0x0, + 5: 0x1, + 6: 0x0, + 7: 0x0 + }, + extractor: null + }, + { + name: "Web Open Font Format 2", + extension: "woff2", + mime: "application/font-woff", + description: "", + signature: { + 0: 0x77, + 1: 0x4f, + 2: 0x46, + 3: 0x32, + 4: 0x0, + 5: 0x1, + 6: 0x0, + 7: 0x0 + }, + extractor: null + }, + { + name: "Embedded OpenType font", + extension: "eot", + mime: "application/octet-stream", + description: "", + signature: [ + { + 8: 0x2, + 9: 0x0, + 10: 0x1, + 34: 0x4c, + 35: 0x50 + }, + { + 8: 0x1, + 9: 0x0, + 10: 0x0, + 34: 0x4c, + 35: 0x50 + }, + { + 8: 0x2, + 9: 0x0, + 10: 0x2, + 34: 0x4c, + 35: 0x50 + }, + ], + extractor: null + }, + { + name: "TrueType Font", + extension: "ttf", + mime: "application/font-sfnt", + description: "", + signature: { + 0: 0x0, + 1: 0x1, + 2: 0x0, + 3: 0x0, + 4: 0x0 + }, + extractor: null + }, + { + name: "OpenType Font", + extension: "otf", + mime: "application/font-sfnt", + description: "", + signature: { + 0: 0x4f, + 1: 0x54, + 2: 0x54, + 3: 0x4f, + 4: 0x0 + }, + extractor: null + }, + { + name: "SQLite", + extension: "sqlite", + mime: "application/x-sqlite3", + description: "", + signature: { + 0: 0x53, + 1: 0x51, + 2: 0x4c, + 3: 0x69 + }, + extractor: null + }, + ] +}; + + +/** + * JPEG extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractJPEG(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + while (stream.hasMore()) { + const marker = stream.getBytes(2); + if (marker[0] !== 0xff) throw new Error(`Invalid marker while parsing JPEG at pos ${stream.position}: ${marker}`); + + let segmentSize = 0; + switch (marker[1]) { + // No length + case 0xd8: // Start of Image + case 0x01: // For temporary use in arithmetic coding + break; + case 0xd9: // End found + return stream.carve(); + + // Variable size segment + case 0xc0: // Start of frame (Baseline DCT) + case 0xc1: // Start of frame (Extended sequential DCT) + case 0xc2: // Start of frame (Progressive DCT) + case 0xc3: // Start of frame (Lossless sequential) + case 0xc4: // Define Huffman Table + case 0xc5: // Start of frame (Differential sequential DCT) + case 0xc6: // Start of frame (Differential progressive DCT) + case 0xc7: // Start of frame (Differential lossless) + case 0xc8: // Reserved for JPEG extensions + case 0xc9: // Start of frame (Extended sequential DCT) + case 0xca: // Start of frame (Progressive DCT) + case 0xcb: // Start of frame (Lossless sequential) + case 0xcc: // Define arithmetic conditioning table + case 0xcd: // Start of frame (Differential sequential DCT) + case 0xce: // Start of frame (Differential progressive DCT) + case 0xcf: // Start of frame (Differential lossless) + case 0xdb: // Define Quantization Table + case 0xde: // Define hierarchical progression + case 0xe0: // Application-specific + case 0xe1: // Application-specific + case 0xe2: // Application-specific + case 0xe3: // Application-specific + case 0xe4: // Application-specific + case 0xe5: // Application-specific + case 0xe6: // Application-specific + case 0xe7: // Application-specific + case 0xe8: // Application-specific + case 0xe9: // Application-specific + case 0xea: // Application-specific + case 0xeb: // Application-specific + case 0xec: // Application-specific + case 0xed: // Application-specific + case 0xee: // Application-specific + case 0xef: // Application-specific + case 0xfe: // Comment + segmentSize = stream.readInt(2, "be"); + stream.position += segmentSize - 2; + break; + + // 1 byte + case 0xdf: // Expand reference image + stream.position++; + break; + + // 2 bytes + case 0xdc: // Define number of lines + case 0xdd: // Define restart interval + stream.position += 2; + break; + + // Start scan + case 0xda: // Start of scan + segmentSize = stream.readInt(2, "be"); + stream.position += segmentSize - 2; + stream.continueUntil(0xff); + break; + + // Continue through encoded data + case 0x00: // Byte stuffing + case 0xd0: // Restart + case 0xd1: // Restart + case 0xd2: // Restart + case 0xd3: // Restart + case 0xd4: // Restart + case 0xd5: // Restart + case 0xd6: // Restart + case 0xd7: // Restart + stream.continueUntil(0xff); + break; + + default: + stream.continueUntil(0xff); + break; + } + } + + throw new Error("Unable to parse JPEG successfully"); +} + + +/** + * Portable executable extractor. + * Assumes that the offset refers to an MZ header. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractMZPE(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Move to PE header pointer + stream.moveTo(0x3c); + const peAddress = stream.readInt(4, "le"); + + // Move to PE header + stream.moveTo(peAddress); + + // Get number of sections + stream.moveForwardsBy(6); + const numSections = stream.readInt(2, "le"); + + // Get optional header size + stream.moveForwardsBy(12); + const optionalHeaderSize = stream.readInt(2, "le"); + + // Move past optional header to section header + stream.moveForwardsBy(2 + optionalHeaderSize); + + // Move to final section header + stream.moveForwardsBy((numSections - 1) * 0x28); + + // Get raw data info + stream.moveForwardsBy(16); + const rawDataSize = stream.readInt(4, "le"); + const rawDataAddress = stream.readInt(4, "le"); + + // Move to end of final section + stream.moveTo(rawDataAddress + rawDataSize); + + return stream.carve(); +} + + +/** + * PDF extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractPDF(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Find end-of-file marker (%%EOF) + stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]); + stream.moveForwardsBy(5); + stream.consumeIf(0x0d); + stream.consumeIf(0x0a); + + return stream.carve(); +} + + +/** + * ZIP extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractZIP(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Find End of central directory record + stream.continueUntil([0x50, 0x4b, 0x05, 0x06]); + + // Get comment length and consume + stream.moveForwardsBy(20); + const commentLength = stream.readInt(2, "le"); + stream.moveForwardsBy(commentLength); + + return stream.carve(); +} + + +/** + * PNG extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractPNG(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Move past signature to first chunk + stream.moveForwardsBy(8); + + let chunkSize = 0, + chunkType = ""; + + while (chunkType !== "IEND") { + chunkSize = stream.readInt(4, "be"); + chunkType = stream.readString(4); + + // Chunk data size + CRC checksum + stream.moveForwardsBy(chunkSize + 4); + } + + + return stream.carve(); +} + + +/** + * BMP extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractBMP(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Move past header + stream.moveForwardsBy(2); + + // Read full file size + const bmpSize = stream.readInt(4, "le"); + + // Move to end of file (file size minus header and size field) + stream.moveForwardsBy(bmpSize - 6); + + return stream.carve(); +} + + +/** + * FLV extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractFLV(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Move past signature, version and flags + stream.moveForwardsBy(5); + + // Read header size + const headerSize = stream.readInt(4, "be"); + + // Skip through the rest of the header + stream.moveForwardsBy(headerSize - 9); + + let tagSize = -11; // Fake size of previous tag header + while (stream.hasMore()) { + const prevTagSize = stream.readInt(4, "be"); + const tagType = stream.readInt(1); + + if ([8, 9, 18].indexOf(tagType) < 0) { + // This tag is not valid + stream.moveBackwardsBy(1); + break; + } + + if (prevTagSize !== (tagSize + 11)) { + // Previous tag was not valid, reverse back over this header + // and the previous tag body and header + stream.moveBackwardsBy(tagSize + 11 + 5); + break; + } + + tagSize = stream.readInt(3, "be"); + + // Move past the rest of the tag header and payload + stream.moveForwardsBy(7 + tagSize); + } + + return stream.carve(); +} + + +/** + * RTF extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractRTF(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + let openTags = 0; + + if (stream.readInt(1) !== 0x7b) { // { + throw new Error("Not a valid RTF file"); + } else { + openTags++; + } + + while (openTags > 0 && stream.hasMore()) { + switch (stream.readInt(1)) { + case 0x7b: // { + openTags++; + break; + case 0x7d: // } + openTags--; + break; + case 0x5c: // \ + // Consume any more escapes and then skip over the next character + stream.consumeIf(0x5c); + stream.position++; + break; + default: + break; + } + } + + return stream.carve(); +} + + +/** + * GZIP extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractGZIP(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + + /* HEADER */ + + // Skip over signature and compression method + stream.moveForwardsBy(3); + + // Read flags + const flags = stream.readInt(1); + + // Skip over last modification time + stream.moveForwardsBy(4); + + // Read compression flags + stream.readInt(1); + + // Skip over OS + stream.moveForwardsBy(1); + + + /* OPTIONAL HEADERS */ + + // Extra fields + if (flags & 0x4) { + const extraFieldsSize = stream.readInt(2, "le"); + stream.moveForwardsby(extraFieldsSize); + } + + // Original filename + if (flags & 0x8) { + stream.continueUntil(0x00); + stream.moveForwardsBy(1); + } + + // Comment + if (flags & 0x10) { + stream.continueUntil(0x00); + stream.moveForwardsBy(1); + } + + // Checksum + if (flags & 0x2) { + stream.moveForwardsBy(2); + } + + + /* DEFLATE DATA */ + + parseDEFLATE(stream); + + + /* FOOTER */ + + // Skip over checksum and size of original uncompressed input + stream.moveForwardsBy(8); + + return stream.carve(); +} + + +/** + * Zlib extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractZlib(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Skip over CMF + stream.moveForwardsBy(1); + + // Read flags + const flags = stream.readInt(1); + + // Skip over preset dictionary checksum + if (flags & 0x20) { + stream.moveForwardsBy(4); + } + + // Parse DEFLATE stream + parseDEFLATE(stream); + + // Skip over final checksum + stream.moveForwardsBy(4); + + return stream.carve(); +} + + +/** + * ELF extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractELF(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Skip over magic number + stream.moveForwardsBy(4); + + // Read architecture (x86 == 1, x64 == 2) + const x86 = stream.readInt(1) === 1; + + // Read endianness (1 == little, 2 == big) + const endian = stream.readInt(1) === 1 ? "le" : "be"; + + // Skip over header values + stream.moveForwardsBy(x86 ? 26 : 34); + + // Read section header table offset + const shoff = x86 ? stream.readInt(4, endian) : stream.readInt(8, endian); + + // Skip over flags, header size and program header size and entries + stream.moveForwardsBy(10); + + // Read section header table entry size + const shentsize = stream.readInt(2, endian); + + // Read number of entries in the section header table + const shnum = stream.readInt(2, endian); + + // Jump to section header table + stream.moveTo(shoff); + + // Move past each section header + stream.moveForwardsBy(shentsize * shnum); + + return stream.carve(); +} + + +// Construct required Huffman Tables +const fixedLiteralTableLengths = new Array(288); +for (let i = 0; i < fixedLiteralTableLengths.length; i++) { + fixedLiteralTableLengths[i] = + (i <= 143) ? 8 : + (i <= 255) ? 9 : + (i <= 279) ? 7 : + 8; +} +const fixedLiteralTable = buildHuffmanTable(fixedLiteralTableLengths); +const fixedDistanceTableLengths = new Array(30).fill(5); +const fixedDistanceTable = buildHuffmanTable(fixedDistanceTableLengths); +const huffmanOrder = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]; + +/** + * Steps through a DEFLATE stream + * + * @param {Stream} stream + */ +function parseDEFLATE(stream) { + // Parse DEFLATE data + let finalBlock = 0; + + while (!finalBlock) { + // Read header + finalBlock = stream.readBits(1); + const blockType = stream.readBits(2); + + if (blockType === 0) { + /* No compression */ + + // Consume the rest of the current byte + stream.moveForwardsBy(1); + // Read the block length value + const blockLength = stream.readInt(2, "le"); + // Move to the end of this block + stream.moveForwardsBy(2 + blockLength); + } else if (blockType === 1) { + /* Fixed Huffman */ + + parseHuffmanBlock(stream, fixedLiteralTable, fixedDistanceTable); + } else if (blockType === 2) { + /* Dynamic Huffman */ + + // Read the number of liternal and length codes + const hlit = stream.readBits(5) + 257; + // Read the number of distance codes + const hdist = stream.readBits(5) + 1; + // Read the number of code lengths + const hclen = stream.readBits(4) + 4; + + // Parse code lengths + const codeLengths = new Uint8Array(huffmanOrder.length); + for (let i = 0; i < hclen; i++) { + codeLengths[huffmanOrder[i]] = stream.readBits(3); + } + + // Parse length table + const codeLengthsTable = buildHuffmanTable(codeLengths); + const lengthTable = new Uint8Array(hlit + hdist); + + let code, repeat, prev; + for (let i = 0; i < hlit + hdist;) { + code = readHuffmanCode(stream, codeLengthsTable); + switch (code) { + case 16: + repeat = 3 + stream.readBits(2); + while (repeat--) lengthTable[i++] = prev; + break; + case 17: + repeat = 3 + stream.readBits(3); + while (repeat--) lengthTable[i++] = 0; + prev = 0; + break; + case 18: + repeat = 11 + stream.readBits(7); + while (repeat--) lengthTable[i++] = 0; + prev = 0; + break; + default: + lengthTable[i++] = code; + prev = code; + break; + } + } + + const dynamicLiteralTable = buildHuffmanTable(lengthTable.subarray(0, hlit)); + const dynamicDistanceTable = buildHuffmanTable(lengthTable.subarray(hlit)); + + parseHuffmanBlock(stream, dynamicLiteralTable, dynamicDistanceTable); + } else { + throw new Error(`Invalid block type while parsing DEFLATE stream at pos ${stream.position}`); + } + } + + // Consume final byte if it has not been fully consumed yet + if (stream.bitPos > 0) + stream.moveForwardsBy(1); +} + + +// Static length tables +const lengthExtraTable = [ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 +]; +const distanceExtraTable = [ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 +]; + +/** + * Parses a Huffman Block given the literal and distance tables + * + * @param {Stream} stream + * @param {Uint32Array} litTab + * @param {Uint32Array} distTab + */ +function parseHuffmanBlock(stream, litTab, distTab) { + let code; + let loops = 0; + while ((code = readHuffmanCode(stream, litTab))) { + // console.log("Code: " + code + " (" + Utils.chr(code) + ") " + Utils.bin(code)); + + // End of block + if (code === 256) break; + + // Detect probably infinite loops + if (++loops > 10000) + throw new Error("Caught in probable infinite loop while parsing Huffman Block"); + + // Literal + if (code < 256) continue; + + // Length code + stream.readBits(lengthExtraTable[code - 257]); + + // Dist code + code = readHuffmanCode(stream, distTab); + stream.readBits(distanceExtraTable[code]); + } +} + + +/** + * Builds a Huffman table given the relevant code lengths + * + * @param {Array} lengths + * @returns {Array} result + * @returns {Uint32Array} result.table + * @returns {number} result.maxCodeLength + * @returns {number} result.minCodeLength + */ +function buildHuffmanTable(lengths) { + const maxCodeLength = Math.max.apply(Math, lengths); + const minCodeLength = Math.min.apply(Math, lengths); + const size = 1 << maxCodeLength; + const table = new Uint32Array(size); + + for (let bitLength = 1, code = 0, skip = 2; bitLength <= maxCodeLength;) { + for (let i = 0; i < lengths.length; i++) { + if (lengths[i] === bitLength) { + let reversed, rtemp, j; + for (reversed = 0, rtemp = code, j = 0; j < bitLength; j++) { + reversed = (reversed << 1) | (rtemp & 1); + rtemp >>= 1; + } + + const value = (bitLength << 16) | i; + for (let j = reversed; j < size; j += skip) { + table[j] = value; + } + + code++; + } + } + + bitLength++; + code <<= 1; + skip <<= 1; + } + + return [table, maxCodeLength, minCodeLength]; +} + + +/** + * Reads the next Huffman code from the stream, given the relevant code table + * + * @param {Stream} stream + * @param {Uint32Array} table + * @returns {number} + */ +function readHuffmanCode(stream, table) { + const [codeTable, maxCodeLength] = table; + + // Read max length + const bitsBuf = stream.readBits(maxCodeLength); + const codeWithLength = codeTable[bitsBuf & ((1 << maxCodeLength) - 1)]; + const codeLength = codeWithLength >>> 16; + + if (codeLength > maxCodeLength) { + throw new Error(`Invalid Huffman Code length while parsing DEFLATE block at pos ${stream.position}: ${codeLength}`); + } + + stream.moveBackwardsByBits(maxCodeLength - codeLength); + + return codeWithLength & 0xffff; +} diff --git a/src/core/lib/FileType.mjs b/src/core/lib/FileType.mjs new file mode 100644 index 00000000..e961a76f --- /dev/null +++ b/src/core/lib/FileType.mjs @@ -0,0 +1,263 @@ +/** + * File type functions + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + * + */ +import {FILE_SIGNATURES} from "./FileSignatures"; +import {sendStatusMessage} from "../Utils"; + + +/** + * Checks whether a signature matches a buffer. + * + * @param {Object|Object[]} sig - A dictionary of offsets with values assigned to them. + * These values can be numbers for static checks, arrays of potential valid matches, + * or bespoke functions to check the validity of the buffer value at that offset. + * @param {Uint8Array} buf + * @param {number} [offset=0] Where in the buffer to start searching from + * @returns {boolean} + */ +function signatureMatches(sig, buf, offset=0) { + // Using a length check seems to be more performant than `sig instanceof Array` + if (sig.length) { + // sig is an Array - return true if any of them match + // The following `reduce` method is nice, but performance matters here, so we + // opt for a faster, if less elegant, for loop. + // return sig.reduce((acc, s) => acc || bytesMatch(s, buf, offset), false); + for (let i = 0; i < sig.length; i++) { + if (bytesMatch(sig[i], buf, offset)) return true; + } + return false; + } else { + return bytesMatch(sig, buf, offset); + } +} + + +/** + * Checks whether a set of bytes match the given buffer. + * + * @param {Object} sig - A dictionary of offsets with values assigned to them. + * These values can be numbers for static checks, arrays of potential valid matches, + * or bespoke functions to check the validity of the buffer value at that offset. + * @param {Uint8Array} buf + * @param {number} [offset=0] Where in the buffer to start searching from + * @returns {boolean} + */ +function bytesMatch(sig, buf, offset=0) { + for (const sigoffset in sig) { + const pos = parseInt(sigoffset, 10) + offset; + switch (typeof sig[sigoffset]) { + case "number": // Static check + if (buf[pos] !== sig[sigoffset]) + return false; + break; + case "object": // Array of options + if (sig[sigoffset].indexOf(buf[pos]) < 0) + return false; + break; + case "function": // More complex calculation + if (!sig[sigoffset](buf[pos])) + return false; + break; + default: + throw new Error(`Unrecognised signature type at offset ${sigoffset}`); + } + } + return true; +} + + +/** + * Given a buffer, detects magic byte sequences at specific positions and returns the + * extension and mime type. + * + * @param {Uint8Array} buf + * @param {string[]} [categories=All] - Which categories of file to look for + * @returns {Object[]} types + * @returns {string} type.name - Name of file type + * @returns {string} type.ext - File extension + * @returns {string} type.mime - Mime type + * @returns {string} [type.desc] - Description + */ +export function detectFileType(buf, categories=Object.keys(FILE_SIGNATURES)) { + if (!(buf && buf.length > 1)) { + return []; + } + + const matchingFiles = []; + const signatures = {}; + + for (const cat in FILE_SIGNATURES) { + if (categories.includes(cat)) { + signatures[cat] = FILE_SIGNATURES[cat]; + } + } + + for (const cat in signatures) { + const category = signatures[cat]; + + category.forEach(filetype => { + if (signatureMatches(filetype.signature, buf)) { + matchingFiles.push(filetype); + } + }); + } + return matchingFiles; +} + + +/** + * Given a buffer, searches for magic byte sequences at all possible positions and returns + * the extensions and mime types. + * + * @param {Uint8Array} buf + * @param {string[]} [categories=All] - Which categories of file to look for + * @returns {Object[]} foundFiles + * @returns {number} foundFiles.offset - The position in the buffer at which this file was found + * @returns {Object} foundFiles.fileDetails + * @returns {string} foundFiles.fileDetails.name - Name of file type + * @returns {string} foundFiles.fileDetails.ext - File extension + * @returns {string} foundFiles.fileDetails.mime - Mime type + * @returns {string} [foundFiles.fileDetails.desc] - Description + */ +export function scanForFileTypes(buf, categories=Object.keys(FILE_SIGNATURES)) { + if (!(buf && buf.length > 1)) { + return []; + } + + const foundFiles = []; + const signatures = {}; + + for (const cat in FILE_SIGNATURES) { + if (categories.includes(cat)) { + signatures[cat] = FILE_SIGNATURES[cat]; + } + } + + for (const cat in signatures) { + const category = signatures[cat]; + + for (let i = 0; i < category.length; i++) { + const filetype = category[i]; + const sigs = filetype.signature.length ? filetype.signature : [filetype.signature]; + + sigs.forEach(sig => { + let pos = 0; + while ((pos = locatePotentialSig(buf, sig, pos)) >= 0) { + if (bytesMatch(sig, buf, pos)) { + sendStatusMessage(`Found potential signature for ${filetype.name} at pos ${pos}`); + foundFiles.push({ + offset: pos, + fileDetails: filetype + }); + } + pos++; + } + }); + } + } + + // Return found files in order of increasing offset + return foundFiles.sort((a, b) => { + return a.offset - b.offset; + }); +} + + +/** + * Fastcheck function to quickly scan the buffer for the first byte in a signature. + * + * @param {Uint8Array} buf - The buffer to search + * @param {Object} sig - A single signature object (Not an array of signatures) + * @param {number} offset - Where to start search from + * @returs {number} The position of the match or -1 if one cannot be found. + */ +function locatePotentialSig(buf, sig, offset) { + // Find values for first key and value in sig + const k = parseInt(Object.keys(sig)[0], 10); + const v = Object.values(sig)[0]; + switch (typeof v) { + case "number": + return buf.indexOf(v, offset + k) - k; + case "object": + for (let i = offset + k; i < buf.length; i++) { + if (v.indexOf(buf[i]) >= 0) return i - k; + } + return -1; + case "function": + for (let i = offset + k; i < buf.length; i++) { + if (v(buf[i])) return i - k; + } + return -1; + default: + throw new Error("Unrecognised signature type"); + } +} + + +/** + * Detects whether the given buffer is a file of the type specified. + * + * @param {string|RegExp} type + * @param {Uint8Array} buf + * @returns {string|false} The mime type or false if the type does not match + */ +export function isType(type, buf) { + const types = detectFileType(buf); + + if (!(types && types.length)) return false; + + if (typeof type === "string") { + return types.reduce((acc, t) => { + const mime = t.mime.startsWith(type) ? t.mime : false; + return acc || mime; + }, false); + } else if (type instanceof RegExp) { + return types.reduce((acc, t) => { + const mime = type.test(t.mime) ? t.mime : false; + return acc || mime; + }, false); + } else { + throw new Error("Invalid type input."); + } +} + + +/** + * Detects whether the given buffer contains an image file. + * + * @param {Uint8Array} buf + * @returns {string|false} The mime type or false if the type does not match + */ +export function isImage(buf) { + return isType("image", buf); +} + + +/** + * Attempts to extract a file from a data stream given its offset and extractor function. + * + * @param {Uint8Array} bytes + * @param {Object} fileDetail + * @param {string} fileDetail.mime + * @param {string} fileDetail.extension + * @param {Function} fileDetail.extractor + * @param {number} offset + * @returns {File} + */ +export function extractFile(bytes, fileDetail, offset) { + if (fileDetail.extractor) { + sendStatusMessage(`Attempting to extract ${fileDetail.name} at pos ${offset}...`); + const fileData = fileDetail.extractor(bytes, offset); + const ext = fileDetail.extension.split(",")[0]; + return new File([fileData], `extracted_at_0x${offset.toString(16)}.${ext}`, { + type: fileDetail.mime + }); + } + + throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`); +} diff --git a/src/core/lib/Magic.mjs b/src/core/lib/Magic.mjs index 00132d5d..f0b55857 100644 --- a/src/core/lib/Magic.mjs +++ b/src/core/lib/Magic.mjs @@ -2,6 +2,7 @@ import OperationConfig from "../config/OperationConfig.json"; import Utils from "../Utils"; import Recipe from "../Recipe"; import Dish from "../Dish"; +import {detectFileType} from "./FileType"; import chiSquared from "chi-squared"; /** @@ -92,7 +93,14 @@ class Magic { * @returns {string} [type.desc] - Description */ detectFileType() { - return Magic.magicFileType(this.inputBuffer); + const fileType = detectFileType(this.inputBuffer); + + if (!fileType.length) return null; + return { + ext: fileType[0].extension, + mime: fileType[0].mime, + desc: fileType[0].description + }; } /** @@ -785,452 +793,9 @@ class Magic { }[code]; } - - /** - * Given a buffer, detects magic byte sequences at specific positions and returns the - * extension and mime type. - * - * @param {Uint8Array} buf - * @returns {Object} type - * @returns {string} type.ext - File extension - * @returns {string} type.mime - Mime type - * @returns {string} [type.desc] - Description - */ - static magicFileType(buf) { - if (!(buf && buf.length > 1)) { - return null; - } - - if (buf[0] === 0xFF && buf[1] === 0xD8 && buf[2] === 0xFF) { - return { - ext: "jpg", - mime: "image/jpeg" - }; - } - - if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4E && buf[3] === 0x47) { - return { - ext: "png", - mime: "image/png" - }; - } - - if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) { - return { - ext: "gif", - mime: "image/gif" - }; - } - - if (buf[8] === 0x57 && buf[9] === 0x45 && buf[10] === 0x42 && buf[11] === 0x50) { - return { - ext: "webp", - mime: "image/webp" - }; - } - - // needs to be before `tif` check - if (((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) && buf[8] === 0x43 && buf[9] === 0x52) { - return { - ext: "cr2", - mime: "image/x-canon-cr2" - }; - } - - if ((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) { - return { - ext: "tif", - mime: "image/tiff" - }; - } - - if (buf[0] === 0x42 && buf[1] === 0x4D) { - return { - ext: "bmp", - mime: "image/bmp" - }; - } - - if (buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0xBC) { - return { - ext: "jxr", - mime: "image/vnd.ms-photo" - }; - } - - if (buf[0] === 0x38 && buf[1] === 0x42 && buf[2] === 0x50 && buf[3] === 0x53) { - return { - ext: "psd", - mime: "image/vnd.adobe.photoshop" - }; - } - - // needs to be before `zip` check - if (buf[0] === 0x50 && buf[1] === 0x4B && buf[2] === 0x3 && buf[3] === 0x4 && buf[30] === 0x6D && buf[31] === 0x69 && buf[32] === 0x6D && buf[33] === 0x65 && buf[34] === 0x74 && buf[35] === 0x79 && buf[36] === 0x70 && buf[37] === 0x65 && buf[38] === 0x61 && buf[39] === 0x70 && buf[40] === 0x70 && buf[41] === 0x6C && buf[42] === 0x69 && buf[43] === 0x63 && buf[44] === 0x61 && buf[45] === 0x74 && buf[46] === 0x69 && buf[47] === 0x6F && buf[48] === 0x6E && buf[49] === 0x2F && buf[50] === 0x65 && buf[51] === 0x70 && buf[52] === 0x75 && buf[53] === 0x62 && buf[54] === 0x2B && buf[55] === 0x7A && buf[56] === 0x69 && buf[57] === 0x70) { - return { - ext: "epub", - mime: "application/epub+zip" - }; - } - - if (buf[0] === 0x50 && buf[1] === 0x4B && (buf[2] === 0x3 || buf[2] === 0x5 || buf[2] === 0x7) && (buf[3] === 0x4 || buf[3] === 0x6 || buf[3] === 0x8)) { - return { - ext: "zip", - mime: "application/zip" - }; - } - - if (buf[257] === 0x75 && buf[258] === 0x73 && buf[259] === 0x74 && buf[260] === 0x61 && buf[261] === 0x72) { - return { - ext: "tar", - mime: "application/x-tar" - }; - } - - if (buf[0] === 0x52 && buf[1] === 0x61 && buf[2] === 0x72 && buf[3] === 0x21 && buf[4] === 0x1A && buf[5] === 0x7 && (buf[6] === 0x0 || buf[6] === 0x1)) { - return { - ext: "rar", - mime: "application/x-rar-compressed" - }; - } - - if (buf[0] === 0x1F && buf[1] === 0x8B && buf[2] === 0x8) { - return { - ext: "gz", - mime: "application/gzip" - }; - } - - if (buf[0] === 0x42 && buf[1] === 0x5A && buf[2] === 0x68) { - return { - ext: "bz2", - mime: "application/x-bzip2" - }; - } - - if (buf[0] === 0x37 && buf[1] === 0x7A && buf[2] === 0xBC && buf[3] === 0xAF && buf[4] === 0x27 && buf[5] === 0x1C) { - return { - ext: "7z", - mime: "application/x-7z-compressed" - }; - } - - if (buf[0] === 0x78 && buf[1] === 0x01) { - return { - ext: "dmg, zlib", - mime: "application/x-apple-diskimage, application/x-deflate" - }; - } - - if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && (buf[3] === 0x18 || buf[3] === 0x20) && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) || (buf[0] === 0x33 && buf[1] === 0x67 && buf[2] === 0x70 && buf[3] === 0x35) || (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x6D && buf[9] === 0x70 && buf[10] === 0x34 && buf[11] === 0x32 && buf[16] === 0x6D && buf[17] === 0x70 && buf[18] === 0x34 && buf[19] === 0x31 && buf[20] === 0x6D && buf[21] === 0x70 && buf[22] === 0x34 && buf[23] === 0x32 && buf[24] === 0x69 && buf[25] === 0x73 && buf[26] === 0x6F && buf[27] === 0x6D)) { - return { - ext: "mp4", - mime: "video/mp4" - }; - } - - if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x56)) { - return { - ext: "m4v", - mime: "video/x-m4v" - }; - } - - if (buf[0] === 0x4D && buf[1] === 0x54 && buf[2] === 0x68 && buf[3] === 0x64) { - return { - ext: "mid", - mime: "audio/midi" - }; - } - - // needs to be before the `webm` check - if (buf[31] === 0x6D && buf[32] === 0x61 && buf[33] === 0x74 && buf[34] === 0x72 && buf[35] === 0x6f && buf[36] === 0x73 && buf[37] === 0x6B && buf[38] === 0x61) { - return { - ext: "mkv", - mime: "video/x-matroska" - }; - } - - if (buf[0] === 0x1A && buf[1] === 0x45 && buf[2] === 0xDF && buf[3] === 0xA3) { - return { - ext: "webm", - mime: "video/webm" - }; - } - - if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x14 && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) { - return { - ext: "mov", - mime: "video/quicktime" - }; - } - - if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x41 && buf[9] === 0x56 && buf[10] === 0x49) { - return { - ext: "avi", - mime: "video/x-msvideo" - }; - } - - if (buf[0] === 0x30 && buf[1] === 0x26 && buf[2] === 0xB2 && buf[3] === 0x75 && buf[4] === 0x8E && buf[5] === 0x66 && buf[6] === 0xCF && buf[7] === 0x11 && buf[8] === 0xA6 && buf[9] === 0xD9) { - return { - ext: "wmv", - mime: "video/x-ms-wmv" - }; - } - - if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x1 && buf[3].toString(16)[0] === "b") { - return { - ext: "mpg", - mime: "video/mpeg" - }; - } - - if ((buf[0] === 0x49 && buf[1] === 0x44 && buf[2] === 0x33) || (buf[0] === 0xFF && buf[1] === 0xfb)) { - return { - ext: "mp3", - mime: "audio/mpeg" - }; - } - - if ((buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x41) || (buf[0] === 0x4D && buf[1] === 0x34 && buf[2] === 0x41 && buf[3] === 0x20)) { - return { - ext: "m4a", - mime: "audio/m4a" - }; - } - - if (buf[0] === 0x4F && buf[1] === 0x67 && buf[2] === 0x67 && buf[3] === 0x53) { - return { - ext: "ogg", - mime: "audio/ogg" - }; - } - - if (buf[0] === 0x66 && buf[1] === 0x4C && buf[2] === 0x61 && buf[3] === 0x43) { - return { - ext: "flac", - mime: "audio/x-flac" - }; - } - - if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x57 && buf[9] === 0x41 && buf[10] === 0x56 && buf[11] === 0x45) { - return { - ext: "wav", - mime: "audio/x-wav" - }; - } - - if (buf[0] === 0x23 && buf[1] === 0x21 && buf[2] === 0x41 && buf[3] === 0x4D && buf[4] === 0x52 && buf[5] === 0x0A) { - return { - ext: "amr", - mime: "audio/amr" - }; - } - - if (buf[0] === 0x25 && buf[1] === 0x50 && buf[2] === 0x44 && buf[3] === 0x46) { - return { - ext: "pdf", - mime: "application/pdf" - }; - } - - if (buf[0] === 0x4D && buf[1] === 0x5A) { - return { - ext: "exe", - mime: "application/x-msdownload" - }; - } - - if ((buf[0] === 0x43 || buf[0] === 0x46) && buf[1] === 0x57 && buf[2] === 0x53) { - return { - ext: "swf", - mime: "application/x-shockwave-flash" - }; - } - - if (buf[0] === 0x7B && buf[1] === 0x5C && buf[2] === 0x72 && buf[3] === 0x74 && buf[4] === 0x66) { - return { - ext: "rtf", - mime: "application/rtf" - }; - } - - if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x46 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) { - return { - ext: "woff", - mime: "application/font-woff" - }; - } - - if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x32 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) { - return { - ext: "woff2", - mime: "application/font-woff" - }; - } - - if (buf[34] === 0x4C && buf[35] === 0x50 && ((buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x01) || (buf[8] === 0x01 && buf[9] === 0x00 && buf[10] === 0x00) || (buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x02))) { - return { - ext: "eot", - mime: "application/octet-stream" - }; - } - - if (buf[0] === 0x00 && buf[1] === 0x01 && buf[2] === 0x00 && buf[3] === 0x00 && buf[4] === 0x00) { - return { - ext: "ttf", - mime: "application/font-sfnt" - }; - } - - if (buf[0] === 0x4F && buf[1] === 0x54 && buf[2] === 0x54 && buf[3] === 0x4F && buf[4] === 0x00) { - return { - ext: "otf", - mime: "application/font-sfnt" - }; - } - - if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) { - return { - ext: "ico", - mime: "image/x-icon" - }; - } - - if (buf[0] === 0x46 && buf[1] === 0x4C && buf[2] === 0x56 && buf[3] === 0x01) { - return { - ext: "flv", - mime: "video/x-flv" - }; - } - - if (buf[0] === 0x25 && buf[1] === 0x21) { - return { - ext: "ps", - mime: "application/postscript" - }; - } - - if (buf[0] === 0xFD && buf[1] === 0x37 && buf[2] === 0x7A && buf[3] === 0x58 && buf[4] === 0x5A && buf[5] === 0x00) { - return { - ext: "xz", - mime: "application/x-xz" - }; - } - - if (buf[0] === 0x53 && buf[1] === 0x51 && buf[2] === 0x4C && buf[3] === 0x69) { - return { - ext: "sqlite", - mime: "application/x-sqlite3" - }; - } - - /** - * - * Added by n1474335 [n1474335@gmail.com] from here on - * - */ - if ((buf[0] === 0x1F && buf[1] === 0x9D) || (buf[0] === 0x1F && buf[1] === 0xA0)) { - return { - ext: "z, tar.z", - mime: "application/x-gtar" - }; - } - - if (buf[0] === 0x7F && buf[1] === 0x45 && buf[2] === 0x4C && buf[3] === 0x46) { - return { - ext: "none, axf, bin, elf, o, prx, puff, so", - mime: "application/x-executable", - desc: "Executable and Linkable Format file. No standard file extension." - }; - } - - if (buf[0] === 0xCA && buf[1] === 0xFE && buf[2] === 0xBA && buf[3] === 0xBE) { - return { - ext: "class", - mime: "application/java-vm" - }; - } - - if (buf[0] === 0xEF && buf[1] === 0xBB && buf[2] === 0xBF) { - return { - ext: "txt", - mime: "text/plain", - desc: "UTF-8 encoded Unicode byte order mark detected, commonly but not exclusively seen in text files." - }; - } - - // Must be before Little-endian UTF-16 BOM - if (buf[0] === 0xFF && buf[1] === 0xFE && buf[2] === 0x00 && buf[3] === 0x00) { - return { - ext: "UTF32LE", - mime: "charset/utf32le", - desc: "Little-endian UTF-32 encoded Unicode byte order mark detected." - }; - } - - if (buf[0] === 0xFF && buf[1] === 0xFE) { - return { - ext: "UTF16LE", - mime: "charset/utf16le", - desc: "Little-endian UTF-16 encoded Unicode byte order mark detected." - }; - } - - if ((buf[0x8001] === 0x43 && buf[0x8002] === 0x44 && buf[0x8003] === 0x30 && buf[0x8004] === 0x30 && buf[0x8005] === 0x31) || - (buf[0x8801] === 0x43 && buf[0x8802] === 0x44 && buf[0x8803] === 0x30 && buf[0x8804] === 0x30 && buf[0x8805] === 0x31) || - (buf[0x9001] === 0x43 && buf[0x9002] === 0x44 && buf[0x9003] === 0x30 && buf[0x9004] === 0x30 && buf[0x9005] === 0x31)) { - return { - ext: "iso", - mime: "application/octet-stream", - desc: "ISO 9660 CD/DVD image file" - }; - } - - if (buf[0] === 0xD0 && buf[1] === 0xCF && buf[2] === 0x11 && buf[3] === 0xE0 && buf[4] === 0xA1 && buf[5] === 0xB1 && buf[6] === 0x1A && buf[7] === 0xE1) { - return { - ext: "doc, xls, ppt", - mime: "application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint", - desc: "Microsoft Office documents" - }; - } - - if (buf[0] === 0x64 && buf[1] === 0x65 && buf[2] === 0x78 && buf[3] === 0x0A && buf[4] === 0x30 && buf[5] === 0x33 && buf[6] === 0x35 && buf[7] === 0x00) { - return { - ext: "dex", - mime: "application/octet-stream", - desc: "Dalvik Executable (Android)" - }; - } - - if (buf[0] === 0x4B && buf[1] === 0x44 && buf[2] === 0x4D) { - return { - ext: "vmdk", - mime: "application/vmdk, application/x-virtualbox-vmdk" - }; - } - - if (buf[0] === 0x43 && buf[1] === 0x72 && buf[2] === 0x32 && buf[3] === 0x34) { - return { - ext: "crx", - mime: "application/crx", - desc: "Google Chrome extension or packaged app" - }; - } - - if (buf[0] === 0x78 && (buf[1] === 0x01 || buf[1] === 0x9C || buf[1] === 0xDA || buf[1] === 0x5e)) { - return { - ext: "zlib", - mime: "application/x-deflate" - }; - } - - return null; - } - } + /** * Byte frequencies of various languages generated from Wikipedia dumps taken in late 2017 and early 2018. * The Chi-Squared test cannot accept expected values of 0, so 0.0001 has been used to account for bytes diff --git a/src/core/lib/Stream.mjs b/src/core/lib/Stream.mjs new file mode 100644 index 00000000..7e82a5eb --- /dev/null +++ b/src/core/lib/Stream.mjs @@ -0,0 +1,263 @@ +/** + * Stream class for parsing binary protocols. + * + * @author n1474335 [n1474335@gmail.com] + * @author tlwr [toby@toby.codes] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + * + */ + +/** + * A Stream can be used to traverse a binary blob, interpreting sections of it + * as various data types. + */ +export default class Stream { + + /** + * Stream constructor. + * + * @param {Uint8Array} input + */ + constructor(input) { + this.bytes = input; + this.length = this.bytes.length; + this.position = 0; + this.bitPos = 0; + } + + /** + * Get a number of bytes from the current position. + * + * @param {number} numBytes + * @returns {Uint8Array} + */ + getBytes(numBytes) { + if (this.position > this.length) return undefined; + + const newPosition = this.position + numBytes; + const bytes = this.bytes.slice(this.position, newPosition); + this.position = newPosition; + this.bitPos = 0; + return bytes; + } + + /** + * Interpret the following bytes as a string, stopping at the next null byte or + * the supplied limit. + * + * @param {number} numBytes + * @returns {string} + */ + readString(numBytes) { + if (this.position > this.length) return undefined; + + let result = ""; + for (let i = this.position; i < this.position + numBytes; i++) { + const currentByte = this.bytes[i]; + if (currentByte === 0) break; + result += String.fromCharCode(currentByte); + } + this.position += numBytes; + this.bitPos = 0; + return result; + } + + /** + * Interpret the following bytes as an integer in big or little endian. + * + * @param {number} numBytes + * @param {string} [endianness="be"] + * @returns {number} + */ + readInt(numBytes, endianness="be") { + if (this.position > this.length) return undefined; + + let val = 0; + if (endianness === "be") { + for (let i = this.position; i < this.position + numBytes; i++) { + val = val << 8; + val |= this.bytes[i]; + } + } else { + for (let i = this.position + numBytes - 1; i >= this.position; i--) { + val = val << 8; + val |= this.bytes[i]; + } + } + this.position += numBytes; + this.bitPos = 0; + return val; + } + + /** + * Reads a number of bits from the buffer. + * + * @TODO Add endianness + * + * @param {number} numBits + * @returns {number} + */ + readBits(numBits) { + if (this.position > this.length) return undefined; + + let bitBuf = 0, + bitBufLen = 0; + + // Add remaining bits from current byte + bitBuf = (this.bytes[this.position++] & bitMask(this.bitPos)) >>> this.bitPos; + bitBufLen = 8 - this.bitPos; + this.bitPos = 0; + + // Not enough bits yet + while (bitBufLen < numBits) { + bitBuf |= this.bytes[this.position++] << bitBufLen; + bitBufLen += 8; + } + + // Reverse back to numBits + if (bitBufLen > numBits) { + const excess = bitBufLen - numBits; + bitBuf &= (1 << numBits) - 1; + bitBufLen -= excess; + this.position--; + this.bitPos = 8 - excess; + } + + return bitBuf; + + /** + * Calculates the bit mask based on the current bit position. + * + * @param {number} bitPos + * @returns {number} The bit mask + */ + function bitMask(bitPos) { + return 256 - (1 << bitPos); + } + } + + /** + * Consume the stream until we reach the specified byte or sequence of bytes. + * + * @param {number|List} val + */ + continueUntil(val) { + if (this.position > this.length) return; + + this.bitPos = 0; + + if (typeof val === "number") { + while (++this.position < this.length && this.bytes[this.position] !== val) { + continue; + } + return; + } + + // val is an array + let found = false; + while (!found && this.position < this.length) { + while (++this.position < this.length && this.bytes[this.position] !== val[0]) { + continue; + } + found = true; + for (let i = 1; i < val.length; i++) { + if (this.position + i > this.length || this.bytes[this.position + i] !== val[i]) + found = false; + } + } + } + + /** + * Consume the next byte if it matches the supplied value. + * + * @param {number} val + */ + consumeIf(val) { + if (this.bytes[this.position] === val) { + this.position++; + this.bitPos = 0; + } + } + + /** + * Move forwards through the stream by the specified number of bytes. + * + * @param {number} numBytes + */ + moveForwardsBy(numBytes) { + const pos = this.position + numBytes; + if (pos < 0 || pos > this.length) + throw new Error("Cannot move to position " + pos + " in stream. Out of bounds."); + this.position = pos; + this.bitPos = 0; + } + + /** + * Move backwards through the stream by the specified number of bytes. + * + * @param {number} numBytes + */ + moveBackwardsBy(numBytes) { + const pos = this.position - numBytes; + if (pos < 0 || pos > this.length) + throw new Error("Cannot move to position " + pos + " in stream. Out of bounds."); + this.position = pos; + this.bitPos = 0; + } + + /** + * Move backwards through the strem by the specified number of bits. + * + * @param {number} numBits + */ + moveBackwardsByBits(numBits) { + if (numBits <= this.bitPos) { + this.bitPos -= numBits; + } else { + if (this.bitPos > 0) { + numBits -= this.bitPos; + this.bitPos = 0; + } + + while (numBits > 0) { + this.moveBackwardsBy(1); + this.bitPos = 8; + this.moveBackwardsByBits(numBits); + numBits -= 8; + } + } + } + + /** + * Move to a specified position in the stream. + * + * @param {number} pos + */ + moveTo(pos) { + if (pos < 0 || pos > this.length) + throw new Error("Cannot move to position " + pos + " in stream. Out of bounds."); + this.position = pos; + this.bitPos = 0; + } + + /** + * Returns true if there are more bytes left in the stream. + * + * @returns {boolean} + */ + hasMore() { + return this.position < this.length; + } + + /** + * Returns a slice of the stream up to the current position. + * + * @returns {Uint8Array} + */ + carve() { + if (this.bitPos > 0) this.position++; + return this.bytes.slice(0, this.position); + } + +} diff --git a/src/core/operations/DetectFileType.mjs b/src/core/operations/DetectFileType.mjs index 1d6897a0..2321cee8 100644 --- a/src/core/operations/DetectFileType.mjs +++ b/src/core/operations/DetectFileType.mjs @@ -5,7 +5,8 @@ */ import Operation from "../Operation"; -import Magic from "../lib/Magic"; +import {detectFileType} from "../lib/FileType"; +import {FILE_SIGNATURES} from "../lib/FileSignatures"; /** * Detect File Type operation @@ -24,7 +25,13 @@ class DetectFileType extends Operation { this.infoURL = "https://wikipedia.org/wiki/List_of_file_signatures"; this.inputType = "ArrayBuffer"; this.outputType = "string"; - this.args = []; + this.args = Object.keys(FILE_SIGNATURES).map(cat => { + return { + name: cat, + type: "boolean", + value: true + }; + }); } /** @@ -34,17 +41,27 @@ class DetectFileType extends Operation { */ run(input, args) { const data = new Uint8Array(input), - type = Magic.magicFileType(data); + categories = []; - if (!type) { + args.forEach((cat, i) => { + if (cat) categories.push(Object.keys(FILE_SIGNATURES)[i]); + }); + + const types = detectFileType(data, categories); + + if (!types.length) { return "Unknown file type. Have you tried checking the entropy of this data to determine whether it might be encrypted or compressed?"; } else { - let output = "File extension: " + type.ext + "\n" + - "MIME type: " + type.mime; + let output = ""; - if (type.desc && type.desc.length) { - output += "\nDescription: " + type.desc; - } + types.forEach(type => { + output += "File extension: " + type.extension + "\n" + + "MIME type: " + type.mime + "\n"; + + if (type.description && type.description.length) { + output += "\nDescription: " + type.description + "\n"; + } + }); return output; } diff --git a/src/core/operations/ExtractFiles.mjs b/src/core/operations/ExtractFiles.mjs new file mode 100644 index 00000000..b9b260bb --- /dev/null +++ b/src/core/operations/ExtractFiles.mjs @@ -0,0 +1,100 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import OperationError from "../errors/OperationError"; +import Utils from "../Utils"; +import {scanForFileTypes, extractFile} from "../lib/FileType"; +import {FILE_SIGNATURES} from "../lib/FileSignatures"; + +/** + * Extract Files operation + */ +class ExtractFiles extends Operation { + + /** + * ExtractFiles constructor + */ + constructor() { + super(); + + this.name = "Extract Files"; + this.module = "Default"; + this.description = "TODO"; + this.infoURL = "https://forensicswiki.org/wiki/File_Carving"; + this.inputType = "ArrayBuffer"; + this.outputType = "List"; + this.presentType = "html"; + this.args = Object.keys(FILE_SIGNATURES).map(cat => { + return { + name: cat, + type: "boolean", + value: cat === "Miscellaneous" ? false : true + }; + }).concat([ + { + name: "Ignore failed extractions", + type: "boolean", + value: "true" + } + ]); + } + + /** + * @param {ArrayBuffer} input + * @param {Object[]} args + * @returns {List} + */ + run(input, args) { + const bytes = new Uint8Array(input), + categories = [], + ignoreFailedExtractions = args.pop(1); + + args.forEach((cat, i) => { + if (cat) categories.push(Object.keys(FILE_SIGNATURES)[i]); + }); + + // Scan for embedded files + const detectedFiles = scanForFileTypes(bytes, categories); + + // Extract each file that we support + const files = []; + const errors = []; + detectedFiles.forEach(detectedFile => { + try { + files.push(extractFile(bytes, detectedFile.fileDetails, detectedFile.offset)); + } catch (err) { + if (!ignoreFailedExtractions && err.message.indexOf("No extraction algorithm available") < 0) { + errors.push( + `Error while attempting to extract ${detectedFile.fileDetails.name} ` + + `at offset ${detectedFile.offset}:\n` + + `${err.message}` + ); + } + } + }); + + if (errors.length) { + throw new OperationError(errors.join("\n\n")); + } + + return files; + } + + + /** + * Displays the files in HTML for web apps. + * + * @param {File[]} files + * @returns {html} + */ + async present(files) { + return await Utils.displayFilesAsHTML(files); + } + +} + +export default ExtractFiles; diff --git a/src/core/operations/GenerateQRCode.mjs b/src/core/operations/GenerateQRCode.mjs index edab6d40..ac7e5c5c 100644 --- a/src/core/operations/GenerateQRCode.mjs +++ b/src/core/operations/GenerateQRCode.mjs @@ -8,7 +8,7 @@ import Operation from "../Operation"; import OperationError from "../errors/OperationError"; import qr from "qr-image"; import { toBase64 } from "../lib/Base64"; -import Magic from "../lib/Magic"; +import { isImage } from "../lib/FileType"; import Utils from "../Utils"; /** @@ -100,9 +100,9 @@ class GenerateQRCode extends Operation { if (format === "PNG") { let dataURI = "data:"; - const type = Magic.magicFileType(data); - if (type && type.mime.indexOf("image") === 0){ - dataURI += type.mime + ";"; + const mime = isImage(data); + if (mime){ + dataURI += mime + ";"; } else { throw new OperationError("Invalid PNG file generated by QR image"); } diff --git a/src/core/operations/ParseQRCode.mjs b/src/core/operations/ParseQRCode.mjs index 75a24d55..ef7af6d7 100644 --- a/src/core/operations/ParseQRCode.mjs +++ b/src/core/operations/ParseQRCode.mjs @@ -6,7 +6,7 @@ import Operation from "../Operation"; import OperationError from "../errors/OperationError"; -import Magic from "../lib/Magic"; +import { isImage } from "../lib/FileType"; import jsqr from "jsqr"; import jimp from "jimp"; @@ -42,64 +42,61 @@ class ParseQRCode extends Operation { * @returns {string} */ async run(input, args) { - const type = Magic.magicFileType(input); const [normalise] = args; // Make sure that the input is an image - if (type && type.mime.indexOf("image") === 0) { - let image = input; + if (!isImage(input)) throw new OperationError("Invalid file type."); - if (normalise) { - // Process the image to be easier to read by jsqr - // Disables the alpha channel - // Sets the image default background to white - // Normalises the image colours - // Makes the image greyscale - // Converts image to a JPEG - image = await new Promise((resolve, reject) => { - jimp.read(Buffer.from(input)) - .then(image => { - image - .rgba(false) - .background(0xFFFFFFFF) - .normalize() - .greyscale() - .getBuffer(jimp.MIME_JPEG, (error, result) => { - resolve(result); - }); - }) - .catch(err => { - reject(new OperationError("Error reading the image file.")); - }); - }); - } + let image = input; - if (image instanceof OperationError) { - throw image; - } - - return new Promise((resolve, reject) => { - jimp.read(Buffer.from(image)) + if (normalise) { + // Process the image to be easier to read by jsqr + // Disables the alpha channel + // Sets the image default background to white + // Normalises the image colours + // Makes the image greyscale + // Converts image to a JPEG + image = await new Promise((resolve, reject) => { + jimp.read(Buffer.from(input)) .then(image => { - if (image.bitmap != null) { - const qrData = jsqr(image.bitmap.data, image.getWidth(), image.getHeight()); - if (qrData != null) { - resolve(qrData.data); - } else { - reject(new OperationError("Couldn't read a QR code from the image.")); - } - } else { - reject(new OperationError("Error reading the image file.")); - } + image + .rgba(false) + .background(0xFFFFFFFF) + .normalize() + .greyscale() + .getBuffer(jimp.MIME_JPEG, (error, result) => { + resolve(result); + }); }) .catch(err => { reject(new OperationError("Error reading the image file.")); }); }); - } else { - throw new OperationError("Invalid file type."); } + if (image instanceof OperationError) { + throw image; + } + + return new Promise((resolve, reject) => { + jimp.read(Buffer.from(image)) + .then(image => { + if (image.bitmap != null) { + const qrData = jsqr(image.bitmap.data, image.getWidth(), image.getHeight()); + if (qrData != null) { + resolve(qrData.data); + } else { + reject(new OperationError("Couldn't read a QR code from the image.")); + } + } else { + reject(new OperationError("Error reading the image file.")); + } + }) + .catch(err => { + reject(new OperationError("Error reading the image file.")); + }); + }); + } } diff --git a/src/core/operations/PlayMedia.mjs b/src/core/operations/PlayMedia.mjs index 81328a73..98b7d088 100644 --- a/src/core/operations/PlayMedia.mjs +++ b/src/core/operations/PlayMedia.mjs @@ -9,7 +9,7 @@ import { fromHex } from "../lib/Hex"; import Operation from "../Operation"; import OperationError from "../errors/OperationError"; import Utils from "../Utils"; -import Magic from "../lib/Magic"; +import { isType, detectFileType } from "../lib/FileType"; /** * PlayMedia operation @@ -66,8 +66,7 @@ class PlayMedia extends Operation { // Determine file type - const type = Magic.magicFileType(input); - if (!(type && /^audio|video/.test(type.mime))) { + if (!isType(/^(audio|video)/, input)) { throw new OperationError("Invalid or unrecognised file type"); } @@ -84,15 +83,15 @@ class PlayMedia extends Operation { async present(data) { if (!data.length) return ""; - const type = Magic.magicFileType(data); - const matches = /^audio|video/.exec(type.mime); + const types = detectFileType(data); + const matches = /^audio|video/.exec(types[0].mime); if (!matches) { throw new OperationError("Invalid file type"); } - const dataURI = `data:${type.mime};base64,${toBase64(data)}`; + const dataURI = `data:${types[0].mime};base64,${toBase64(data)}`; const element = matches[0]; - let html = `<${element} src='${dataURI}' type='${type.mime}' controls>`; + let html = `<${element} src='${dataURI}' type='${types[0].mime}' controls>`; html += "

Unsupported media type.

"; html += ``; return html; diff --git a/src/core/operations/RenderImage.mjs b/src/core/operations/RenderImage.mjs index 7edd2072..07866eaf 100644 --- a/src/core/operations/RenderImage.mjs +++ b/src/core/operations/RenderImage.mjs @@ -9,7 +9,7 @@ import { fromHex } from "../lib/Hex"; import Operation from "../Operation"; import OperationError from "../errors/OperationError"; import Utils from "../Utils"; -import Magic from "../lib/Magic"; +import {isImage} from "../lib/FileType"; /** * Render Image operation @@ -72,8 +72,7 @@ class RenderImage extends Operation { } // Determine file type - const type = Magic.magicFileType(input); - if (!(type && type.mime.indexOf("image") === 0)) { + if (!isImage(input)) { throw new OperationError("Invalid file type"); } @@ -92,9 +91,9 @@ class RenderImage extends Operation { let dataURI = "data:"; // Determine file type - const type = Magic.magicFileType(data); - if (type && type.mime.indexOf("image") === 0) { - dataURI += type.mime + ";"; + const mime = isImage(data); + if (mime) { + dataURI += mime + ";"; } else { throw new OperationError("Invalid file type"); } diff --git a/src/core/operations/ScanForEmbeddedFiles.mjs b/src/core/operations/ScanForEmbeddedFiles.mjs index a38c477f..ae88134f 100644 --- a/src/core/operations/ScanForEmbeddedFiles.mjs +++ b/src/core/operations/ScanForEmbeddedFiles.mjs @@ -6,7 +6,8 @@ import Operation from "../Operation"; import Utils from "../Utils"; -import Magic from "../lib/Magic"; +import {scanForFileTypes} from "../lib/FileType"; +import {FILE_SIGNATURES} from "../lib/FileSignatures"; /** * Scan for Embedded Files operation @@ -25,13 +26,13 @@ class ScanForEmbeddedFiles extends Operation { this.infoURL = "https://wikipedia.org/wiki/List_of_file_signatures"; this.inputType = "ArrayBuffer"; this.outputType = "string"; - this.args = [ - { - "name": "Ignore common byte sequences", - "type": "boolean", - "value": true - } - ]; + this.args = Object.keys(FILE_SIGNATURES).map(cat => { + return { + name: cat, + type: "boolean", + value: cat === "Miscellaneous" ? false : true + }; + }); } /** @@ -41,43 +42,33 @@ class ScanForEmbeddedFiles extends Operation { */ run(input, args) { let output = "Scanning data for 'magic bytes' which may indicate embedded files. The following results may be false positives and should not be treat as reliable. Any suffiently long file is likely to contain these magic bytes coincidentally.\n", - type, - numFound = 0, - numCommonFound = 0; - const ignoreCommon = args[0], - commonExts = ["ico", "ttf", ""], + numFound = 0; + const categories = [], data = new Uint8Array(input); - for (let i = 0; i < data.length; i++) { - type = Magic.magicFileType(data.slice(i)); - if (type) { - if (ignoreCommon && commonExts.indexOf(type.ext) > -1) { - numCommonFound++; - continue; - } - numFound++; - output += "\nOffset " + i + " (0x" + Utils.hex(i) + "):\n" + - " File extension: " + type.ext + "\n" + - " MIME type: " + type.mime + "\n"; + args.forEach((cat, i) => { + if (cat) categories.push(Object.keys(FILE_SIGNATURES)[i]); + }); - if (type.desc && type.desc.length) { - output += " Description: " + type.desc + "\n"; + const types = scanForFileTypes(data, categories); + + if (types.length) { + types.forEach(type => { + numFound++; + output += "\nOffset " + type.offset + " (0x" + Utils.hex(type.offset) + "):\n" + + " File extension: " + type.fileDetails.extension + "\n" + + " MIME type: " + type.fileDetails.mime + "\n"; + + if (type.fileDetails.description && type.fileDetails.description.length) { + output += " Description: " + type.fileDetails.description + "\n"; } - } + }); } if (numFound === 0) { output += "\nNo embedded files were found."; } - if (numCommonFound > 0) { - output += "\n\n" + numCommonFound; - output += numCommonFound === 1 ? - " file type was detected that has a common byte sequence. This is likely to be a false positive." : - " file types were detected that have common byte sequences. These are likely to be false positives."; - output += " Run this operation with the 'Ignore common byte sequences' option unchecked to see details."; - } - return output; } diff --git a/src/core/operations/SplitColourChannels.mjs b/src/core/operations/SplitColourChannels.mjs index f11ca14e..c38af409 100644 --- a/src/core/operations/SplitColourChannels.mjs +++ b/src/core/operations/SplitColourChannels.mjs @@ -7,7 +7,7 @@ import Operation from "../Operation"; import OperationError from "../errors/OperationError"; import Utils from "../Utils"; -import Magic from "../lib/Magic"; +import {isImage} from "../lib/FileType"; import jimp from "jimp"; @@ -38,56 +38,53 @@ class SplitColourChannels extends Operation { * @returns {List} */ async run(input, args) { - const type = Magic.magicFileType(input); // Make sure that the input is an image - if (type && type.mime.indexOf("image") === 0) { - const parsedImage = await jimp.read(Buffer.from(input)); + if (!isImage(input)) throw new OperationError("Invalid file type."); - const red = new Promise(async (resolve, reject) => { - try { - const split = parsedImage - .clone() - .color([ - {apply: "blue", params: [-255]}, - {apply: "green", params: [-255]} - ]) - .getBufferAsync(jimp.MIME_PNG); - resolve(new File([new Uint8Array((await split).values())], "red.png", {type: "image/png"})); - } catch (err) { - reject(new OperationError(`Could not split red channel: ${err}`)); - } - }); + const parsedImage = await jimp.read(Buffer.from(input)); - const green = new Promise(async (resolve, reject) => { - try { - const split = parsedImage.clone() - .color([ - {apply: "red", params: [-255]}, - {apply: "blue", params: [-255]}, - ]).getBufferAsync(jimp.MIME_PNG); - resolve(new File([new Uint8Array((await split).values())], "green.png", {type: "image/png"})); - } catch (err) { - reject(new OperationError(`Could not split green channel: ${err}`)); - } - }); + const red = new Promise(async (resolve, reject) => { + try { + const split = parsedImage + .clone() + .color([ + {apply: "blue", params: [-255]}, + {apply: "green", params: [-255]} + ]) + .getBufferAsync(jimp.MIME_PNG); + resolve(new File([new Uint8Array((await split).values())], "red.png", {type: "image/png"})); + } catch (err) { + reject(new OperationError(`Could not split red channel: ${err}`)); + } + }); - const blue = new Promise(async (resolve, reject) => { - try { - const split = parsedImage - .color([ - {apply: "red", params: [-255]}, - {apply: "green", params: [-255]}, - ]).getBufferAsync(jimp.MIME_PNG); - resolve(new File([new Uint8Array((await split).values())], "blue.png", {type: "image/png"})); - } catch (err) { - reject(new OperationError(`Could not split blue channel: ${err}`)); - } - }); + const green = new Promise(async (resolve, reject) => { + try { + const split = parsedImage.clone() + .color([ + {apply: "red", params: [-255]}, + {apply: "blue", params: [-255]}, + ]).getBufferAsync(jimp.MIME_PNG); + resolve(new File([new Uint8Array((await split).values())], "green.png", {type: "image/png"})); + } catch (err) { + reject(new OperationError(`Could not split green channel: ${err}`)); + } + }); - return await Promise.all([red, green, blue]); - } else { - throw new OperationError("Invalid file type."); - } + const blue = new Promise(async (resolve, reject) => { + try { + const split = parsedImage + .color([ + {apply: "red", params: [-255]}, + {apply: "green", params: [-255]}, + ]).getBufferAsync(jimp.MIME_PNG); + resolve(new File([new Uint8Array((await split).values())], "blue.png", {type: "image/png"})); + } catch (err) { + reject(new OperationError(`Could not split blue channel: ${err}`)); + } + }); + + return await Promise.all([red, green, blue]); } /** diff --git a/src/core/operations/Untar.mjs b/src/core/operations/Untar.mjs index af029184..8655ba68 100644 --- a/src/core/operations/Untar.mjs +++ b/src/core/operations/Untar.mjs @@ -6,6 +6,7 @@ import Operation from "../Operation"; import Utils from "../Utils"; +import Stream from "../lib/Stream"; /** * Untar operation @@ -41,38 +42,6 @@ class Untar extends Operation { * @returns {List} */ run(input, args) { - const Stream = function(input) { - this.bytes = input; - this.position = 0; - }; - - Stream.prototype.getBytes = function(bytesToGet) { - const newPosition = this.position + bytesToGet; - const bytes = this.bytes.slice(this.position, newPosition); - this.position = newPosition; - return bytes; - }; - - Stream.prototype.readString = function(numBytes) { - let result = ""; - for (let i = this.position; i < this.position + numBytes; i++) { - const currentByte = this.bytes[i]; - if (currentByte === 0) break; - result += String.fromCharCode(currentByte); - } - this.position += numBytes; - return result; - }; - - Stream.prototype.readInt = function(numBytes, base) { - const string = this.readString(numBytes); - return parseInt(string, base); - }; - - Stream.prototype.hasMore = function() { - return this.position < this.bytes.length; - }; - const stream = new Stream(input), files = []; @@ -85,7 +54,7 @@ class Untar extends Operation { ownerUID: stream.readString(8), ownerGID: stream.readString(8), size: parseInt(stream.readString(12), 8), // Octal - lastModTime: new Date(1000 * stream.readInt(12, 8)), // Octal + lastModTime: new Date(1000 * parseInt(stream.readString(12), 8)), // Octal checksum: stream.readString(8), type: stream.readString(1), linkedFileName: stream.readString(100), diff --git a/src/web/Manager.mjs b/src/web/Manager.mjs index 30cb4943..5fa0e8c1 100755 --- a/src/web/Manager.mjs +++ b/src/web/Manager.mjs @@ -173,6 +173,7 @@ class Manager { this.addDynamicListener("#output-file-download", "click", this.output.downloadFile, this.output); this.addDynamicListener("#output-file-slice i", "click", this.output.displayFileSlice, this.output); document.getElementById("show-file-overlay").addEventListener("click", this.output.showFileOverlayClick.bind(this.output)); + this.addDynamicListener(".extract-file,.extract-file i", "click", this.output.extractFileClick, this.output); // Options document.getElementById("options").addEventListener("click", this.options.optionsClick.bind(this.options)); diff --git a/src/web/OutputWaiter.mjs b/src/web/OutputWaiter.mjs index 2d93507c..0a10b8b2 100755 --- a/src/web/OutputWaiter.mjs +++ b/src/web/OutputWaiter.mjs @@ -494,6 +494,24 @@ class OutputWaiter { magicButton.setAttribute("data-original-title", "Magic!"); } + + /** + * Handler for extract file events. + * + * @param {Event} e + */ + async extractFileClick(e) { + e.preventDefault(); + e.stopPropagation(); + + const el = e.target.nodeName === "I" ? e.target.parentNode : e.target; + const blobURL = el.getAttribute("blob-url"); + const fileName = el.getAttribute("file-name"); + + const blob = await fetch(blobURL).then(r => r.blob()); + this.manager.input.loadFile(new File([blob], fileName, {type: blob.type})); + } + } export default OutputWaiter; diff --git a/src/web/html/index.html b/src/web/html/index.html index 74eb0ed8..302355d9 100755 --- a/src/web/html/index.html +++ b/src/web/html/index.html @@ -271,7 +271,7 @@ content_copy