mirror of
https://github.com/gchq/CyberChef.git
synced 2024-11-02 14:11:02 +01:00
ESM: Port Extract.js module
This commit is contained in:
parent
3c214ce17c
commit
b3ee251ee3
41
src/core/lib/Extract.mjs
Normal file
41
src/core/lib/Extract.mjs
Normal file
@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Identifier extraction functions
|
||||
*
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Runs search operations across the input data using regular expressions.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {RegExp} searchRegex
|
||||
* @param {RegExp} removeRegex - A regular expression defining results to remove from the
|
||||
* final list
|
||||
* @param {boolean} includeTotal - Whether or not to include the total number of results
|
||||
* @returns {string}
|
||||
*/
|
||||
export function search (input, searchRegex, removeRegex, includeTotal) {
|
||||
let output = "",
|
||||
total = 0,
|
||||
match;
|
||||
|
||||
while ((match = searchRegex.exec(input))) {
|
||||
// Moves pointer when an empty string is matched (prevents infinite loop)
|
||||
if (match.index === searchRegex.lastIndex) {
|
||||
searchRegex.lastIndex++;
|
||||
}
|
||||
|
||||
if (removeRegex && removeRegex.test(match[0]))
|
||||
continue;
|
||||
total++;
|
||||
output += match[0] + "\n";
|
||||
}
|
||||
|
||||
if (includeTotal)
|
||||
output = "Total found: " + total + "\n\n" + output;
|
||||
|
||||
return output;
|
||||
}
|
52
src/core/operations/ExtractDates.mjs
Normal file
52
src/core/operations/ExtractDates.mjs
Normal file
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import { search } from "../lib/Extract";
|
||||
|
||||
/**
|
||||
* Extract dates operation
|
||||
*/
|
||||
class ExtractDates extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractDates constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract dates";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts dates in the following formats<ul><li><code>yyyy-mm-dd</code></li><li><code>dd/mm/yyyy</code></li><li><code>mm/dd/yyyy</code></li></ul>Dividers can be any of /, -, . or space";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const displayTotal = args[0],
|
||||
date1 = "(?:19|20)\\d\\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])", // yyyy-mm-dd
|
||||
date2 = "(?:0[1-9]|[12][0-9]|3[01])[- /.](?:0[1-9]|1[012])[- /.](?:19|20)\\d\\d", // dd/mm/yyyy
|
||||
date3 = "(?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])[- /.](?:19|20)\\d\\d", // mm/dd/yyyy
|
||||
regex = new RegExp(date1 + "|" + date2 + "|" + date3, "ig");
|
||||
|
||||
return search(input, regex, null, displayTotal);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default ExtractDates;
|
49
src/core/operations/ExtractDomains.mjs
Normal file
49
src/core/operations/ExtractDomains.mjs
Normal file
@ -0,0 +1,49 @@
|
||||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import { search } from "../lib/Extract";
|
||||
|
||||
/**
|
||||
* Extract domains operation
|
||||
*/
|
||||
class ExtractDomains extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractDomains constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract domains";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts domain names.<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": "Extract.DISPLAY_TOTAL"
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const displayTotal = args[0],
|
||||
regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig;
|
||||
|
||||
return search(input, regex, null, displayTotal);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default ExtractDomains;
|
49
src/core/operations/ExtractEmailAddresses.mjs
Normal file
49
src/core/operations/ExtractEmailAddresses.mjs
Normal file
@ -0,0 +1,49 @@
|
||||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import { search } from "../lib/Extract";
|
||||
|
||||
/**
|
||||
* Extract email addresses operation
|
||||
*/
|
||||
class ExtractEmailAddresses extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractEmailAddresses constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract email addresses";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts all email addresses from the input.";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const displayTotal = args[0],
|
||||
regex = /\b\w[-.\w]*@[-\w]+(?:\.[-\w]+)*\.[A-Z]{2,4}\b/ig;
|
||||
|
||||
return search(input, regex, null, displayTotal);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default ExtractEmailAddresses;
|
79
src/core/operations/ExtractFilePaths.mjs
Normal file
79
src/core/operations/ExtractFilePaths.mjs
Normal file
@ -0,0 +1,79 @@
|
||||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import { search } from "../lib/Extract";
|
||||
/**
|
||||
* Extract file paths operation
|
||||
*/
|
||||
class ExtractFilePaths extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractFilePaths constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract file paths";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts anything that looks like a Windows or UNIX file path.<br><br>Note that if UNIX is selected, there will likely be a lot of false positives.";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "Windows",
|
||||
"type": "boolean",
|
||||
"value": true
|
||||
},
|
||||
{
|
||||
"name": "UNIX",
|
||||
"type": "boolean",
|
||||
"value": true
|
||||
},
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const includeWinPath = args[0],
|
||||
includeUnixPath = args[1],
|
||||
displayTotal = args[2],
|
||||
winDrive = "[A-Z]:\\\\",
|
||||
winName = "[A-Z\\d][A-Z\\d\\- '_\\(\\)~]{0,61}",
|
||||
winExt = "[A-Z\\d]{1,6}",
|
||||
winPath = winDrive + "(?:" + winName + "\\\\?)*" + winName +
|
||||
"(?:\\." + winExt + ")?",
|
||||
unixPath = "(?:/[A-Z\\d.][A-Z\\d\\-.]{0,61})+";
|
||||
let filePaths = "";
|
||||
|
||||
if (includeWinPath && includeUnixPath) {
|
||||
filePaths = winPath + "|" + unixPath;
|
||||
} else if (includeWinPath) {
|
||||
filePaths = winPath;
|
||||
} else if (includeUnixPath) {
|
||||
filePaths = unixPath;
|
||||
}
|
||||
|
||||
if (filePaths) {
|
||||
const regex = new RegExp(filePaths, "ig");
|
||||
return search(input, regex, null, displayTotal);
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default ExtractFilePaths;
|
94
src/core/operations/ExtractIPAddresses.mjs
Normal file
94
src/core/operations/ExtractIPAddresses.mjs
Normal file
@ -0,0 +1,94 @@
|
||||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import { search } from "../lib/Extract";
|
||||
|
||||
/**
|
||||
* Extract IP addresses operation
|
||||
*/
|
||||
class ExtractIPAddresses extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractIPAddresses constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract IP addresses";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts all IPv4 and IPv6 addresses.<br><br>Warning: Given a string <code>710.65.0.456</code>, this will match <code>10.65.0.45</code> so always check the original input!";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "IPv4",
|
||||
"type": "boolean",
|
||||
"value": true
|
||||
},
|
||||
{
|
||||
"name": "IPv6",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
},
|
||||
{
|
||||
"name": "Remove local IPv4 addresses",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
},
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const includeIpv4 = args[0],
|
||||
includeIpv6 = args[1],
|
||||
removeLocal = args[2],
|
||||
displayTotal = args[3],
|
||||
ipv4 = "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?",
|
||||
ipv6 = "((?=.*::)(?!.*::.+::)(::)?([\\dA-F]{1,4}:(:|\\b)|){5}|([\\dA-F]{1,4}:){6})((([\\dA-F]{1,4}((?!\\3)::|:\\b|(?![\\dA-F])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})";
|
||||
let ips = "";
|
||||
|
||||
if (includeIpv4 && includeIpv6) {
|
||||
ips = ipv4 + "|" + ipv6;
|
||||
} else if (includeIpv4) {
|
||||
ips = ipv4;
|
||||
} else if (includeIpv6) {
|
||||
ips = ipv6;
|
||||
}
|
||||
|
||||
if (ips) {
|
||||
const regex = new RegExp(ips, "ig");
|
||||
|
||||
if (removeLocal) {
|
||||
const ten = "10\\..+",
|
||||
oneninetwo = "192\\.168\\..+",
|
||||
oneseventwo = "172\\.(?:1[6-9]|2\\d|3[01])\\..+",
|
||||
onetwoseven = "127\\..+",
|
||||
removeRegex = new RegExp("^(?:" + ten + "|" + oneninetwo +
|
||||
"|" + oneseventwo + "|" + onetwoseven + ")");
|
||||
|
||||
return search(input, regex, removeRegex, displayTotal);
|
||||
} else {
|
||||
return search(input, regex, null, displayTotal);
|
||||
}
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default ExtractIPAddresses;
|
49
src/core/operations/ExtractMACAddresses.mjs
Normal file
49
src/core/operations/ExtractMACAddresses.mjs
Normal file
@ -0,0 +1,49 @@
|
||||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import { search } from "../lib/Extract";
|
||||
|
||||
/**
|
||||
* Extract MAC addresses operation
|
||||
*/
|
||||
class ExtractMACAddresses extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractMACAddresses constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract MAC addresses";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts all Media Access Control (MAC) addresses from the input.";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const displayTotal = args[0],
|
||||
regex = /[A-F\d]{2}(?:[:-][A-F\d]{2}){5}/ig;
|
||||
|
||||
return search(input, regex, null, displayTotal);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default ExtractMACAddresses;
|
55
src/core/operations/ExtractURLs.mjs
Normal file
55
src/core/operations/ExtractURLs.mjs
Normal file
@ -0,0 +1,55 @@
|
||||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import { search } from "../lib/Extract";
|
||||
|
||||
/**
|
||||
* Extract URLs operation
|
||||
*/
|
||||
class ExtractURLs extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractURLs constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract URLs";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives.";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const displayTotal = args[0],
|
||||
protocol = "[A-Z]+://",
|
||||
hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
|
||||
port = ":\\d+";
|
||||
let path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*";
|
||||
|
||||
path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
|
||||
const regex = new RegExp(protocol + hostname + "(?:" + port +
|
||||
")?(?:" + path + ")?", "ig");
|
||||
return search(input, regex, null, displayTotal);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default ExtractURLs;
|
118
src/core/operations/Strings.mjs
Normal file
118
src/core/operations/Strings.mjs
Normal file
@ -0,0 +1,118 @@
|
||||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import XRegExp from "xregexp";
|
||||
import { search } from "../lib/Extract";
|
||||
/**
|
||||
* Strings operation
|
||||
*/
|
||||
class Strings extends Operation {
|
||||
|
||||
/**
|
||||
* Strings constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Strings";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts all strings from the input.";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "Encoding",
|
||||
"type": "option",
|
||||
"value": ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"]
|
||||
},
|
||||
{
|
||||
"name": "Minimum length",
|
||||
"type": "number",
|
||||
"value": 4
|
||||
},
|
||||
{
|
||||
"name": "Match",
|
||||
"type": "option",
|
||||
"value": [
|
||||
"[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
|
||||
"[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const encoding = args[0],
|
||||
minLen = args[1],
|
||||
matchType = args[2],
|
||||
displayTotal = args[3],
|
||||
alphanumeric = "A-Z\\d",
|
||||
punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
|
||||
printable = "\x20-\x7e",
|
||||
uniAlphanumeric = "\\pL\\pN",
|
||||
uniPunctuation = "\\pP\\pZ",
|
||||
uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
|
||||
|
||||
let strings = "";
|
||||
|
||||
switch (matchType) {
|
||||
case "Alphanumeric + punctuation (A)":
|
||||
strings = `[${alphanumeric + punctuation}]`;
|
||||
break;
|
||||
case "All printable chars (A)":
|
||||
case "Null-terminated strings (A)":
|
||||
strings = `[${printable}]`;
|
||||
break;
|
||||
case "Alphanumeric + punctuation (U)":
|
||||
strings = `[${uniAlphanumeric + uniPunctuation}]`;
|
||||
break;
|
||||
case "All printable chars (U)":
|
||||
case "Null-terminated strings (U)":
|
||||
strings = `[${uniPrintable}]`;
|
||||
break;
|
||||
}
|
||||
|
||||
// UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
|
||||
switch (encoding) {
|
||||
case "All":
|
||||
strings = `(\x00?${strings}\x00?)`;
|
||||
break;
|
||||
case "16-bit littleendian":
|
||||
strings = `(${strings}\x00)`;
|
||||
break;
|
||||
case "16-bit bigendian":
|
||||
strings = `(\x00${strings})`;
|
||||
break;
|
||||
case "Single byte":
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
strings = `${strings}{${minLen},}`;
|
||||
|
||||
if (matchType.includes("Null-terminated")) {
|
||||
strings += "\x00";
|
||||
}
|
||||
|
||||
const regex = new XRegExp(strings, "ig");
|
||||
|
||||
return search(input, regex, null, displayTotal);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default Strings;
|
@ -1,333 +0,0 @@
|
||||
import XRegExp from "xregexp";
|
||||
|
||||
|
||||
/**
|
||||
* Identifier extraction operations.
|
||||
*
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*
|
||||
* @namespace
|
||||
*/
|
||||
const Extract = {
|
||||
|
||||
/**
|
||||
* Runs search operations across the input data using regular expressions.
|
||||
*
|
||||
* @private
|
||||
* @param {string} input
|
||||
* @param {RegExp} searchRegex
|
||||
* @param {RegExp} removeRegex - A regular expression defining results to remove from the
|
||||
* final list
|
||||
* @param {boolean} includeTotal - Whether or not to include the total number of results
|
||||
* @returns {string}
|
||||
*/
|
||||
_search: function(input, searchRegex, removeRegex, includeTotal) {
|
||||
let output = "",
|
||||
total = 0,
|
||||
match;
|
||||
|
||||
while ((match = searchRegex.exec(input))) {
|
||||
// Moves pointer when an empty string is matched (prevents infinite loop)
|
||||
if (match.index === searchRegex.lastIndex) {
|
||||
searchRegex.lastIndex++;
|
||||
}
|
||||
|
||||
if (removeRegex && removeRegex.test(match[0]))
|
||||
continue;
|
||||
total++;
|
||||
output += match[0] + "\n";
|
||||
}
|
||||
|
||||
if (includeTotal)
|
||||
output = "Total found: " + total + "\n\n" + output;
|
||||
|
||||
return output;
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
MIN_STRING_LEN: 4,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
STRING_MATCH_TYPE: [
|
||||
"[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
|
||||
"[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
|
||||
],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
DISPLAY_TOTAL: false,
|
||||
|
||||
/**
|
||||
* Strings operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runStrings: function(input, args) {
|
||||
const encoding = args[0],
|
||||
minLen = args[1],
|
||||
matchType = args[2],
|
||||
displayTotal = args[3],
|
||||
alphanumeric = "A-Z\\d",
|
||||
punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
|
||||
printable = "\x20-\x7e",
|
||||
uniAlphanumeric = "\\pL\\pN",
|
||||
uniPunctuation = "\\pP\\pZ",
|
||||
uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
|
||||
|
||||
let strings = "";
|
||||
|
||||
switch (matchType) {
|
||||
case "Alphanumeric + punctuation (A)":
|
||||
strings = `[${alphanumeric + punctuation}]`;
|
||||
break;
|
||||
case "All printable chars (A)":
|
||||
case "Null-terminated strings (A)":
|
||||
strings = `[${printable}]`;
|
||||
break;
|
||||
case "Alphanumeric + punctuation (U)":
|
||||
strings = `[${uniAlphanumeric + uniPunctuation}]`;
|
||||
break;
|
||||
case "All printable chars (U)":
|
||||
case "Null-terminated strings (U)":
|
||||
strings = `[${uniPrintable}]`;
|
||||
break;
|
||||
}
|
||||
|
||||
// UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
|
||||
switch (encoding) {
|
||||
case "All":
|
||||
strings = `(\x00?${strings}\x00?)`;
|
||||
break;
|
||||
case "16-bit littleendian":
|
||||
strings = `(${strings}\x00)`;
|
||||
break;
|
||||
case "16-bit bigendian":
|
||||
strings = `(\x00${strings})`;
|
||||
break;
|
||||
case "Single byte":
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
strings = `${strings}{${minLen},}`;
|
||||
|
||||
if (matchType.includes("Null-terminated")) {
|
||||
strings += "\x00";
|
||||
}
|
||||
|
||||
const regex = new XRegExp(strings, "ig");
|
||||
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
INCLUDE_IPV4: true,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
INCLUDE_IPV6: false,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
REMOVE_LOCAL: false,
|
||||
|
||||
/**
|
||||
* Extract IP addresses operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runIp: function(input, args) {
|
||||
let includeIpv4 = args[0],
|
||||
includeIpv6 = args[1],
|
||||
removeLocal = args[2],
|
||||
displayTotal = args[3],
|
||||
ipv4 = "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?",
|
||||
ipv6 = "((?=.*::)(?!.*::.+::)(::)?([\\dA-F]{1,4}:(:|\\b)|){5}|([\\dA-F]{1,4}:){6})((([\\dA-F]{1,4}((?!\\3)::|:\\b|(?![\\dA-F])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})",
|
||||
ips = "";
|
||||
|
||||
if (includeIpv4 && includeIpv6) {
|
||||
ips = ipv4 + "|" + ipv6;
|
||||
} else if (includeIpv4) {
|
||||
ips = ipv4;
|
||||
} else if (includeIpv6) {
|
||||
ips = ipv6;
|
||||
}
|
||||
|
||||
if (ips) {
|
||||
const regex = new RegExp(ips, "ig");
|
||||
|
||||
if (removeLocal) {
|
||||
let ten = "10\\..+",
|
||||
oneninetwo = "192\\.168\\..+",
|
||||
oneseventwo = "172\\.(?:1[6-9]|2\\d|3[01])\\..+",
|
||||
onetwoseven = "127\\..+",
|
||||
removeRegex = new RegExp("^(?:" + ten + "|" + oneninetwo +
|
||||
"|" + oneseventwo + "|" + onetwoseven + ")");
|
||||
|
||||
return Extract._search(input, regex, removeRegex, displayTotal);
|
||||
} else {
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
}
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Extract email addresses operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runEmail: function(input, args) {
|
||||
let displayTotal = args[0],
|
||||
regex = /\b\w[-.\w]*@[-\w]+(?:\.[-\w]+)*\.[A-Z]{2,4}\b/ig;
|
||||
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Extract MAC addresses operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runMac: function(input, args) {
|
||||
let displayTotal = args[0],
|
||||
regex = /[A-F\d]{2}(?:[:-][A-F\d]{2}){5}/ig;
|
||||
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Extract URLs operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runUrls: function(input, args) {
|
||||
let displayTotal = args[0],
|
||||
protocol = "[A-Z]+://",
|
||||
hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
|
||||
port = ":\\d+",
|
||||
path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*";
|
||||
|
||||
path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
|
||||
const regex = new RegExp(protocol + hostname + "(?:" + port +
|
||||
")?(?:" + path + ")?", "ig");
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Extract domains operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runDomains: function(input, args) {
|
||||
const displayTotal = args[0],
|
||||
regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig;
|
||||
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
INCLUDE_WIN_PATH: true,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
INCLUDE_UNIX_PATH: true,
|
||||
|
||||
/**
|
||||
* Extract file paths operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runFilePaths: function(input, args) {
|
||||
let includeWinPath = args[0],
|
||||
includeUnixPath = args[1],
|
||||
displayTotal = args[2],
|
||||
winDrive = "[A-Z]:\\\\",
|
||||
winName = "[A-Z\\d][A-Z\\d\\- '_\\(\\)~]{0,61}",
|
||||
winExt = "[A-Z\\d]{1,6}",
|
||||
winPath = winDrive + "(?:" + winName + "\\\\?)*" + winName +
|
||||
"(?:\\." + winExt + ")?",
|
||||
unixPath = "(?:/[A-Z\\d.][A-Z\\d\\-.]{0,61})+",
|
||||
filePaths = "";
|
||||
|
||||
if (includeWinPath && includeUnixPath) {
|
||||
filePaths = winPath + "|" + unixPath;
|
||||
} else if (includeWinPath) {
|
||||
filePaths = winPath;
|
||||
} else if (includeUnixPath) {
|
||||
filePaths = unixPath;
|
||||
}
|
||||
|
||||
if (filePaths) {
|
||||
const regex = new RegExp(filePaths, "ig");
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Extract dates operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runDates: function(input, args) {
|
||||
let displayTotal = args[0],
|
||||
date1 = "(?:19|20)\\d\\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])", // yyyy-mm-dd
|
||||
date2 = "(?:0[1-9]|[12][0-9]|3[01])[- /.](?:0[1-9]|1[012])[- /.](?:19|20)\\d\\d", // dd/mm/yyyy
|
||||
date3 = "(?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])[- /.](?:19|20)\\d\\d", // mm/dd/yyyy
|
||||
regex = new RegExp(date1 + "|" + date2 + "|" + date3, "ig");
|
||||
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
export default Extract;
|
Loading…
Reference in New Issue
Block a user