From b3ee251ee3ac3122c0299ea923448bdbd50f28eb Mon Sep 17 00:00:00 2001 From: Matt C Date: Tue, 15 May 2018 16:30:17 +0100 Subject: [PATCH] ESM: Port Extract.js module --- src/core/lib/Extract.mjs | 41 +++ src/core/operations/ExtractDates.mjs | 52 +++ src/core/operations/ExtractDomains.mjs | 49 +++ src/core/operations/ExtractEmailAddresses.mjs | 49 +++ src/core/operations/ExtractFilePaths.mjs | 79 +++++ src/core/operations/ExtractIPAddresses.mjs | 94 +++++ src/core/operations/ExtractMACAddresses.mjs | 49 +++ src/core/operations/ExtractURLs.mjs | 55 +++ src/core/operations/Strings.mjs | 118 +++++++ src/core/operations/legacy/Extract.js | 333 ------------------ 10 files changed, 586 insertions(+), 333 deletions(-) create mode 100644 src/core/lib/Extract.mjs create mode 100644 src/core/operations/ExtractDates.mjs create mode 100644 src/core/operations/ExtractDomains.mjs create mode 100644 src/core/operations/ExtractEmailAddresses.mjs create mode 100644 src/core/operations/ExtractFilePaths.mjs create mode 100644 src/core/operations/ExtractIPAddresses.mjs create mode 100644 src/core/operations/ExtractMACAddresses.mjs create mode 100644 src/core/operations/ExtractURLs.mjs create mode 100644 src/core/operations/Strings.mjs delete mode 100755 src/core/operations/legacy/Extract.js diff --git a/src/core/lib/Extract.mjs b/src/core/lib/Extract.mjs new file mode 100644 index 00000000..ba57d758 --- /dev/null +++ b/src/core/lib/Extract.mjs @@ -0,0 +1,41 @@ +/** + * Identifier extraction functions + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + * + */ + +/** + * Runs search operations across the input data using regular expressions. + * + * @param {string} input + * @param {RegExp} searchRegex + * @param {RegExp} removeRegex - A regular expression defining results to remove from the + * final list + * @param {boolean} includeTotal - Whether or not to include the total number of results + * @returns {string} + */ +export function search (input, searchRegex, removeRegex, includeTotal) { + let output = "", + total = 0, + match; + + while ((match = searchRegex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (match.index === searchRegex.lastIndex) { + searchRegex.lastIndex++; + } + + if (removeRegex && removeRegex.test(match[0])) + continue; + total++; + output += match[0] + "\n"; + } + + if (includeTotal) + output = "Total found: " + total + "\n\n" + output; + + return output; +} diff --git a/src/core/operations/ExtractDates.mjs b/src/core/operations/ExtractDates.mjs new file mode 100644 index 00000000..530db194 --- /dev/null +++ b/src/core/operations/ExtractDates.mjs @@ -0,0 +1,52 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import { search } from "../lib/Extract"; + +/** + * Extract dates operation + */ +class ExtractDates extends Operation { + + /** + * ExtractDates constructor + */ + constructor() { + super(); + + this.name = "Extract dates"; + this.module = "Regex"; + this.description = "Extracts dates in the following formatsDividers can be any of /, -, . or space"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Display total", + "type": "boolean", + "value": false + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const displayTotal = args[0], + date1 = "(?:19|20)\\d\\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])", // yyyy-mm-dd + date2 = "(?:0[1-9]|[12][0-9]|3[01])[- /.](?:0[1-9]|1[012])[- /.](?:19|20)\\d\\d", // dd/mm/yyyy + date3 = "(?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])[- /.](?:19|20)\\d\\d", // mm/dd/yyyy + regex = new RegExp(date1 + "|" + date2 + "|" + date3, "ig"); + + return search(input, regex, null, displayTotal); + } + +} + +export default ExtractDates; diff --git a/src/core/operations/ExtractDomains.mjs b/src/core/operations/ExtractDomains.mjs new file mode 100644 index 00000000..8eae8064 --- /dev/null +++ b/src/core/operations/ExtractDomains.mjs @@ -0,0 +1,49 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import { search } from "../lib/Extract"; + +/** + * Extract domains operation + */ +class ExtractDomains extends Operation { + + /** + * ExtractDomains constructor + */ + constructor() { + super(); + + this.name = "Extract domains"; + this.module = "Regex"; + this.description = "Extracts domain names.
Note that this will not include paths. Use Extract URLs to find entire URLs."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Display total", + "type": "boolean", + "value": "Extract.DISPLAY_TOTAL" + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const displayTotal = args[0], + regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig; + + return search(input, regex, null, displayTotal); + } + +} + +export default ExtractDomains; diff --git a/src/core/operations/ExtractEmailAddresses.mjs b/src/core/operations/ExtractEmailAddresses.mjs new file mode 100644 index 00000000..6c2dc740 --- /dev/null +++ b/src/core/operations/ExtractEmailAddresses.mjs @@ -0,0 +1,49 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import { search } from "../lib/Extract"; + +/** + * Extract email addresses operation + */ +class ExtractEmailAddresses extends Operation { + + /** + * ExtractEmailAddresses constructor + */ + constructor() { + super(); + + this.name = "Extract email addresses"; + this.module = "Regex"; + this.description = "Extracts all email addresses from the input."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Display total", + "type": "boolean", + "value": false + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const displayTotal = args[0], + regex = /\b\w[-.\w]*@[-\w]+(?:\.[-\w]+)*\.[A-Z]{2,4}\b/ig; + + return search(input, regex, null, displayTotal); + } + +} + +export default ExtractEmailAddresses; diff --git a/src/core/operations/ExtractFilePaths.mjs b/src/core/operations/ExtractFilePaths.mjs new file mode 100644 index 00000000..11f10f72 --- /dev/null +++ b/src/core/operations/ExtractFilePaths.mjs @@ -0,0 +1,79 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import { search } from "../lib/Extract"; +/** + * Extract file paths operation + */ +class ExtractFilePaths extends Operation { + + /** + * ExtractFilePaths constructor + */ + constructor() { + super(); + + this.name = "Extract file paths"; + this.module = "Regex"; + this.description = "Extracts anything that looks like a Windows or UNIX file path.

Note that if UNIX is selected, there will likely be a lot of false positives."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Windows", + "type": "boolean", + "value": true + }, + { + "name": "UNIX", + "type": "boolean", + "value": true + }, + { + "name": "Display total", + "type": "boolean", + "value": false + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const includeWinPath = args[0], + includeUnixPath = args[1], + displayTotal = args[2], + winDrive = "[A-Z]:\\\\", + winName = "[A-Z\\d][A-Z\\d\\- '_\\(\\)~]{0,61}", + winExt = "[A-Z\\d]{1,6}", + winPath = winDrive + "(?:" + winName + "\\\\?)*" + winName + + "(?:\\." + winExt + ")?", + unixPath = "(?:/[A-Z\\d.][A-Z\\d\\-.]{0,61})+"; + let filePaths = ""; + + if (includeWinPath && includeUnixPath) { + filePaths = winPath + "|" + unixPath; + } else if (includeWinPath) { + filePaths = winPath; + } else if (includeUnixPath) { + filePaths = unixPath; + } + + if (filePaths) { + const regex = new RegExp(filePaths, "ig"); + return search(input, regex, null, displayTotal); + } else { + return ""; + } + } + +} + +export default ExtractFilePaths; diff --git a/src/core/operations/ExtractIPAddresses.mjs b/src/core/operations/ExtractIPAddresses.mjs new file mode 100644 index 00000000..b69d97d0 --- /dev/null +++ b/src/core/operations/ExtractIPAddresses.mjs @@ -0,0 +1,94 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import { search } from "../lib/Extract"; + +/** + * Extract IP addresses operation + */ +class ExtractIPAddresses extends Operation { + + /** + * ExtractIPAddresses constructor + */ + constructor() { + super(); + + this.name = "Extract IP addresses"; + this.module = "Regex"; + this.description = "Extracts all IPv4 and IPv6 addresses.

Warning: Given a string 710.65.0.456, this will match 10.65.0.45 so always check the original input!"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "IPv4", + "type": "boolean", + "value": true + }, + { + "name": "IPv6", + "type": "boolean", + "value": false + }, + { + "name": "Remove local IPv4 addresses", + "type": "boolean", + "value": false + }, + { + "name": "Display total", + "type": "boolean", + "value": false + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const includeIpv4 = args[0], + includeIpv6 = args[1], + removeLocal = args[2], + displayTotal = args[3], + ipv4 = "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?", + ipv6 = "((?=.*::)(?!.*::.+::)(::)?([\\dA-F]{1,4}:(:|\\b)|){5}|([\\dA-F]{1,4}:){6})((([\\dA-F]{1,4}((?!\\3)::|:\\b|(?![\\dA-F])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})"; + let ips = ""; + + if (includeIpv4 && includeIpv6) { + ips = ipv4 + "|" + ipv6; + } else if (includeIpv4) { + ips = ipv4; + } else if (includeIpv6) { + ips = ipv6; + } + + if (ips) { + const regex = new RegExp(ips, "ig"); + + if (removeLocal) { + const ten = "10\\..+", + oneninetwo = "192\\.168\\..+", + oneseventwo = "172\\.(?:1[6-9]|2\\d|3[01])\\..+", + onetwoseven = "127\\..+", + removeRegex = new RegExp("^(?:" + ten + "|" + oneninetwo + + "|" + oneseventwo + "|" + onetwoseven + ")"); + + return search(input, regex, removeRegex, displayTotal); + } else { + return search(input, regex, null, displayTotal); + } + } else { + return ""; + } + } + +} + +export default ExtractIPAddresses; diff --git a/src/core/operations/ExtractMACAddresses.mjs b/src/core/operations/ExtractMACAddresses.mjs new file mode 100644 index 00000000..9c3c2a5b --- /dev/null +++ b/src/core/operations/ExtractMACAddresses.mjs @@ -0,0 +1,49 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import { search } from "../lib/Extract"; + +/** + * Extract MAC addresses operation + */ +class ExtractMACAddresses extends Operation { + + /** + * ExtractMACAddresses constructor + */ + constructor() { + super(); + + this.name = "Extract MAC addresses"; + this.module = "Regex"; + this.description = "Extracts all Media Access Control (MAC) addresses from the input."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Display total", + "type": "boolean", + "value": false + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const displayTotal = args[0], + regex = /[A-F\d]{2}(?:[:-][A-F\d]{2}){5}/ig; + + return search(input, regex, null, displayTotal); + } + +} + +export default ExtractMACAddresses; diff --git a/src/core/operations/ExtractURLs.mjs b/src/core/operations/ExtractURLs.mjs new file mode 100644 index 00000000..ab306d3f --- /dev/null +++ b/src/core/operations/ExtractURLs.mjs @@ -0,0 +1,55 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import { search } from "../lib/Extract"; + +/** + * Extract URLs operation + */ +class ExtractURLs extends Operation { + + /** + * ExtractURLs constructor + */ + constructor() { + super(); + + this.name = "Extract URLs"; + this.module = "Regex"; + this.description = "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Display total", + "type": "boolean", + "value": false + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const displayTotal = args[0], + protocol = "[A-Z]+://", + hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+", + port = ":\\d+"; + let path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*"; + + path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*"; + const regex = new RegExp(protocol + hostname + "(?:" + port + + ")?(?:" + path + ")?", "ig"); + return search(input, regex, null, displayTotal); + } + +} + +export default ExtractURLs; diff --git a/src/core/operations/Strings.mjs b/src/core/operations/Strings.mjs new file mode 100644 index 00000000..a833f6dc --- /dev/null +++ b/src/core/operations/Strings.mjs @@ -0,0 +1,118 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import XRegExp from "xregexp"; +import { search } from "../lib/Extract"; +/** + * Strings operation + */ +class Strings extends Operation { + + /** + * Strings constructor + */ + constructor() { + super(); + + this.name = "Strings"; + this.module = "Regex"; + this.description = "Extracts all strings from the input."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Encoding", + "type": "option", + "value": ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"] + }, + { + "name": "Minimum length", + "type": "number", + "value": 4 + }, + { + "name": "Match", + "type": "option", + "value": [ + "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)", + "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)" + ] + }, + { + "name": "Display total", + "type": "boolean", + "value": false + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const encoding = args[0], + minLen = args[1], + matchType = args[2], + displayTotal = args[3], + alphanumeric = "A-Z\\d", + punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@", + printable = "\x20-\x7e", + uniAlphanumeric = "\\pL\\pN", + uniPunctuation = "\\pP\\pZ", + uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP"; + + let strings = ""; + + switch (matchType) { + case "Alphanumeric + punctuation (A)": + strings = `[${alphanumeric + punctuation}]`; + break; + case "All printable chars (A)": + case "Null-terminated strings (A)": + strings = `[${printable}]`; + break; + case "Alphanumeric + punctuation (U)": + strings = `[${uniAlphanumeric + uniPunctuation}]`; + break; + case "All printable chars (U)": + case "Null-terminated strings (U)": + strings = `[${uniPrintable}]`; + break; + } + + // UTF-16 support is hacked in by allowing null bytes on either side of the matched chars + switch (encoding) { + case "All": + strings = `(\x00?${strings}\x00?)`; + break; + case "16-bit littleendian": + strings = `(${strings}\x00)`; + break; + case "16-bit bigendian": + strings = `(\x00${strings})`; + break; + case "Single byte": + default: + break; + } + + strings = `${strings}{${minLen},}`; + + if (matchType.includes("Null-terminated")) { + strings += "\x00"; + } + + const regex = new XRegExp(strings, "ig"); + + return search(input, regex, null, displayTotal); + } + +} + +export default Strings; diff --git a/src/core/operations/legacy/Extract.js b/src/core/operations/legacy/Extract.js deleted file mode 100755 index 92c75a21..00000000 --- a/src/core/operations/legacy/Extract.js +++ /dev/null @@ -1,333 +0,0 @@ -import XRegExp from "xregexp"; - - -/** - * Identifier extraction operations. - * - * @author n1474335 [n1474335@gmail.com] - * @copyright Crown Copyright 2016 - * @license Apache-2.0 - * - * @namespace - */ -const Extract = { - - /** - * Runs search operations across the input data using regular expressions. - * - * @private - * @param {string} input - * @param {RegExp} searchRegex - * @param {RegExp} removeRegex - A regular expression defining results to remove from the - * final list - * @param {boolean} includeTotal - Whether or not to include the total number of results - * @returns {string} - */ - _search: function(input, searchRegex, removeRegex, includeTotal) { - let output = "", - total = 0, - match; - - while ((match = searchRegex.exec(input))) { - // Moves pointer when an empty string is matched (prevents infinite loop) - if (match.index === searchRegex.lastIndex) { - searchRegex.lastIndex++; - } - - if (removeRegex && removeRegex.test(match[0])) - continue; - total++; - output += match[0] + "\n"; - } - - if (includeTotal) - output = "Total found: " + total + "\n\n" + output; - - return output; - }, - - - /** - * @constant - * @default - */ - MIN_STRING_LEN: 4, - /** - * @constant - * @default - */ - STRING_MATCH_TYPE: [ - "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)", - "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)" - ], - /** - * @constant - * @default - */ - ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"], - /** - * @constant - * @default - */ - DISPLAY_TOTAL: false, - - /** - * Strings operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runStrings: function(input, args) { - const encoding = args[0], - minLen = args[1], - matchType = args[2], - displayTotal = args[3], - alphanumeric = "A-Z\\d", - punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@", - printable = "\x20-\x7e", - uniAlphanumeric = "\\pL\\pN", - uniPunctuation = "\\pP\\pZ", - uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP"; - - let strings = ""; - - switch (matchType) { - case "Alphanumeric + punctuation (A)": - strings = `[${alphanumeric + punctuation}]`; - break; - case "All printable chars (A)": - case "Null-terminated strings (A)": - strings = `[${printable}]`; - break; - case "Alphanumeric + punctuation (U)": - strings = `[${uniAlphanumeric + uniPunctuation}]`; - break; - case "All printable chars (U)": - case "Null-terminated strings (U)": - strings = `[${uniPrintable}]`; - break; - } - - // UTF-16 support is hacked in by allowing null bytes on either side of the matched chars - switch (encoding) { - case "All": - strings = `(\x00?${strings}\x00?)`; - break; - case "16-bit littleendian": - strings = `(${strings}\x00)`; - break; - case "16-bit bigendian": - strings = `(\x00${strings})`; - break; - case "Single byte": - default: - break; - } - - strings = `${strings}{${minLen},}`; - - if (matchType.includes("Null-terminated")) { - strings += "\x00"; - } - - const regex = new XRegExp(strings, "ig"); - - return Extract._search(input, regex, null, displayTotal); - }, - - - /** - * @constant - * @default - */ - INCLUDE_IPV4: true, - /** - * @constant - * @default - */ - INCLUDE_IPV6: false, - /** - * @constant - * @default - */ - REMOVE_LOCAL: false, - - /** - * Extract IP addresses operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runIp: function(input, args) { - let includeIpv4 = args[0], - includeIpv6 = args[1], - removeLocal = args[2], - displayTotal = args[3], - ipv4 = "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?", - ipv6 = "((?=.*::)(?!.*::.+::)(::)?([\\dA-F]{1,4}:(:|\\b)|){5}|([\\dA-F]{1,4}:){6})((([\\dA-F]{1,4}((?!\\3)::|:\\b|(?![\\dA-F])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})", - ips = ""; - - if (includeIpv4 && includeIpv6) { - ips = ipv4 + "|" + ipv6; - } else if (includeIpv4) { - ips = ipv4; - } else if (includeIpv6) { - ips = ipv6; - } - - if (ips) { - const regex = new RegExp(ips, "ig"); - - if (removeLocal) { - let ten = "10\\..+", - oneninetwo = "192\\.168\\..+", - oneseventwo = "172\\.(?:1[6-9]|2\\d|3[01])\\..+", - onetwoseven = "127\\..+", - removeRegex = new RegExp("^(?:" + ten + "|" + oneninetwo + - "|" + oneseventwo + "|" + onetwoseven + ")"); - - return Extract._search(input, regex, removeRegex, displayTotal); - } else { - return Extract._search(input, regex, null, displayTotal); - } - } else { - return ""; - } - }, - - - /** - * Extract email addresses operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runEmail: function(input, args) { - let displayTotal = args[0], - regex = /\b\w[-.\w]*@[-\w]+(?:\.[-\w]+)*\.[A-Z]{2,4}\b/ig; - - return Extract._search(input, regex, null, displayTotal); - }, - - - /** - * Extract MAC addresses operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runMac: function(input, args) { - let displayTotal = args[0], - regex = /[A-F\d]{2}(?:[:-][A-F\d]{2}){5}/ig; - - return Extract._search(input, regex, null, displayTotal); - }, - - - /** - * Extract URLs operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runUrls: function(input, args) { - let displayTotal = args[0], - protocol = "[A-Z]+://", - hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+", - port = ":\\d+", - path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*"; - - path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*"; - const regex = new RegExp(protocol + hostname + "(?:" + port + - ")?(?:" + path + ")?", "ig"); - return Extract._search(input, regex, null, displayTotal); - }, - - - /** - * Extract domains operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runDomains: function(input, args) { - const displayTotal = args[0], - regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig; - - return Extract._search(input, regex, null, displayTotal); - }, - - - /** - * @constant - * @default - */ - INCLUDE_WIN_PATH: true, - /** - * @constant - * @default - */ - INCLUDE_UNIX_PATH: true, - - /** - * Extract file paths operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runFilePaths: function(input, args) { - let includeWinPath = args[0], - includeUnixPath = args[1], - displayTotal = args[2], - winDrive = "[A-Z]:\\\\", - winName = "[A-Z\\d][A-Z\\d\\- '_\\(\\)~]{0,61}", - winExt = "[A-Z\\d]{1,6}", - winPath = winDrive + "(?:" + winName + "\\\\?)*" + winName + - "(?:\\." + winExt + ")?", - unixPath = "(?:/[A-Z\\d.][A-Z\\d\\-.]{0,61})+", - filePaths = ""; - - if (includeWinPath && includeUnixPath) { - filePaths = winPath + "|" + unixPath; - } else if (includeWinPath) { - filePaths = winPath; - } else if (includeUnixPath) { - filePaths = unixPath; - } - - if (filePaths) { - const regex = new RegExp(filePaths, "ig"); - return Extract._search(input, regex, null, displayTotal); - } else { - return ""; - } - }, - - - /** - * Extract dates operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runDates: function(input, args) { - let displayTotal = args[0], - date1 = "(?:19|20)\\d\\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])", // yyyy-mm-dd - date2 = "(?:0[1-9]|[12][0-9]|3[01])[- /.](?:0[1-9]|1[012])[- /.](?:19|20)\\d\\d", // dd/mm/yyyy - date3 = "(?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])[- /.](?:19|20)\\d\\d", // mm/dd/yyyy - regex = new RegExp(date1 + "|" + date2 + "|" + date3, "ig"); - - return Extract._search(input, regex, null, displayTotal); - }, - -}; - -export default Extract;