From 56551712d6b373883d80fdfdebde6d9e18c453e8 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Wed, 3 Jan 2018 16:51:10 +0000 Subject: [PATCH 1/5] Began implementing UTF-16 support in the 'Strings' operation. --- package-lock.json | 5 + package.json | 1 + src/core/config/OperationConfig.js | 44 +++-- src/core/config/modules/Default.js | 11 -- src/core/config/modules/OpModules.js | 2 + src/core/config/modules/Regex.js | 30 +++ src/core/operations/Extract.js | 31 ++- src/core/operations/Regex.js | 272 +++++++++++++++++++++++++++ src/core/operations/StrUtils.js | 256 ------------------------- 9 files changed, 363 insertions(+), 289 deletions(-) create mode 100644 src/core/config/modules/Regex.js create mode 100644 src/core/operations/Regex.js diff --git a/package-lock.json b/package-lock.json index afc07887..d90679f7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10251,6 +10251,11 @@ "resolved": "https://registry.npmjs.org/xpath/-/xpath-0.0.27.tgz", "integrity": "sha512-fg03WRxtkCV6ohClePNAECYsmpKKTv5L8y/X3Dn1hQrec3POx2jHZ/0P2qQ6HvsrU1BmeqXcof3NGGueG6LxwQ==" }, + "xregexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/xregexp/-/xregexp-4.0.0.tgz", + "integrity": "sha512-PHyM+sQouu7xspQQwELlGwwd05mXUFqwFYfqPO0cC7x4fxyHnnuetmQr6CjJiafIDoH4MogHb9dOoJzR/Y4rFg==" + }, "xtend": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz", diff --git a/package.json b/package.json index fe524dfa..d4092f2b 100644 --- a/package.json +++ b/package.json @@ -102,6 +102,7 @@ "vkbeautify": "^0.99.3", "xmldom": "^0.1.27", "xpath": "0.0.27", + "xregexp": "^4.0.0", "zlibjs": "^0.3.1" }, "scripts": { diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 43e06c31..50091bf9 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -30,6 +30,7 @@ import NetBIOS from "../operations/NetBIOS.js"; import PHP from "../operations/PHP.js"; import PublicKey from "../operations/PublicKey.js"; import Punycode from "../operations/Punycode.js"; +import Regex from "../operations/Regex.js"; import Rotate from "../operations/Rotate.js"; import SeqUtils from "../operations/SeqUtils.js"; import Shellcode from "../operations/Shellcode.js"; @@ -2058,7 +2059,7 @@ const OperationConfig = { args: [] }, "Find / Replace": { - module: "Default", + module: "Regex", description: "Replaces all occurrences of the first string with the second.

Includes support for regular expressions (regex), simple strings and extended strings (which support \\n, \\r, \\t, \\b, \\f and escaped hex bytes using \\x notation, e.g. \\x00 for a null byte).", manualBake: true, inputType: "string", @@ -2068,7 +2069,7 @@ const OperationConfig = { name: "Find", type: "toggleString", value: "", - toggleValues: StrUtils.SEARCH_TYPE + toggleValues: Regex.SEARCH_TYPE }, { name: "Replace", @@ -2078,17 +2079,17 @@ const OperationConfig = { { name: "Global match", type: "boolean", - value: StrUtils.FIND_REPLACE_GLOBAL, + value: Regex.FIND_REPLACE_GLOBAL, }, { name: "Case insensitive", type: "boolean", - value: StrUtils.FIND_REPLACE_CASE, + value: Regex.FIND_REPLACE_CASE, }, { name: "Multiline matching", type: "boolean", - value: StrUtils.FIND_REPLACE_MULTILINE, + value: Regex.FIND_REPLACE_MULTILINE, }, ] @@ -2160,7 +2161,7 @@ const OperationConfig = { ] }, "Strings": { - module: "Default", + module: "Regex", description: "Extracts all strings from the input.", inputType: "string", outputType: "string", @@ -2174,11 +2175,16 @@ const OperationConfig = { name: "Display total", type: "boolean", value: Extract.DISPLAY_TOTAL + }, + { + name: "Encoding", + type: "option", + value: Extract.ENCODING_LIST } ] }, "Extract IP addresses": { - module: "Default", + module: "Regex", description: "Extracts all IPv4 and IPv6 addresses.

Warning: Given a string 710.65.0.456, this will match 10.65.0.45 so always check the original input!", inputType: "string", outputType: "string", @@ -2206,7 +2212,7 @@ const OperationConfig = { ] }, "Extract email addresses": { - module: "Default", + module: "Regex", description: "Extracts all email addresses from the input.", inputType: "string", outputType: "string", @@ -2219,7 +2225,7 @@ const OperationConfig = { ] }, "Extract MAC addresses": { - module: "Default", + module: "Regex", description: "Extracts all Media Access Control (MAC) addresses from the input.", inputType: "string", outputType: "string", @@ -2232,7 +2238,7 @@ const OperationConfig = { ] }, "Extract URLs": { - module: "Default", + module: "Regex", description: "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives.", inputType: "string", outputType: "string", @@ -2245,7 +2251,7 @@ const OperationConfig = { ] }, "Extract domains": { - module: "Default", + module: "Regex", description: "Extracts domain names.
Note that this will not include paths. Use Extract URLs to find entire URLs.", inputType: "string", outputType: "string", @@ -2258,7 +2264,7 @@ const OperationConfig = { ] }, "Extract file paths": { - module: "Default", + module: "Regex", description: "Extracts anything that looks like a Windows or UNIX file path.

Note that if UNIX is selected, there will likely be a lot of false positives.", inputType: "string", outputType: "string", @@ -2281,7 +2287,7 @@ const OperationConfig = { ] }, "Extract dates": { - module: "Default", + module: "Regex", description: "Extracts dates in the following formatsDividers can be any of /, -, . or space", inputType: "string", outputType: "string", @@ -2294,7 +2300,7 @@ const OperationConfig = { ] }, "Regular expression": { - module: "Default", + module: "Regex", description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.", manualBake: true, inputType: "string", @@ -2303,7 +2309,7 @@ const OperationConfig = { { name: "Built in regexes", type: "populateOption", - value: StrUtils.REGEX_PRE_POPULATE, + value: Regex.REGEX_PRE_POPULATE, target: 1, }, { @@ -2314,22 +2320,22 @@ const OperationConfig = { { name: "Case insensitive", type: "boolean", - value: StrUtils.REGEX_CASE_INSENSITIVE + value: Regex.REGEX_CASE_INSENSITIVE }, { name: "Multiline matching", type: "boolean", - value: StrUtils.REGEX_MULTILINE_MATCHING + value: Regex.REGEX_MULTILINE_MATCHING }, { name: "Display total", type: "boolean", - value: StrUtils.DISPLAY_TOTAL + value: Regex.DISPLAY_TOTAL }, { name: "Output format", type: "option", - value: StrUtils.OUTPUT_FORMAT + value: Regex.OUTPUT_FORMAT }, ] }, diff --git a/src/core/config/modules/Default.js b/src/core/config/modules/Default.js index b36e00aa..27d2bd0a 100644 --- a/src/core/config/modules/Default.js +++ b/src/core/config/modules/Default.js @@ -10,7 +10,6 @@ import Convert from "../../operations/Convert.js"; import DateTime from "../../operations/DateTime.js"; import Endian from "../../operations/Endian.js"; import Entropy from "../../operations/Entropy.js"; -import Extract from "../../operations/Extract.js"; import FileType from "../../operations/FileType.js"; import Hexdump from "../../operations/Hexdump.js"; import HTML from "../../operations/HTML.js"; @@ -99,11 +98,9 @@ OpModules.Default = { "Format MAC addresses": MAC.runFormat, "Encode NetBIOS Name": NetBIOS.runEncodeName, "Decode NetBIOS Name": NetBIOS.runDecodeName, - "Regular expression": StrUtils.runRegex, "Offset checker": StrUtils.runOffsetChecker, "To Upper case": StrUtils.runUpper, "To Lower case": StrUtils.runLower, - "Find / Replace": StrUtils.runFindReplace, "Split": StrUtils.runSplit, "Filter": StrUtils.runFilter, "Escape string": StrUtils.runEscape, @@ -132,14 +129,6 @@ OpModules.Default = { "Translate DateTime Format": DateTime.runTranslateFormat, "From UNIX Timestamp": DateTime.runFromUnixTimestamp, "To UNIX Timestamp": DateTime.runToUnixTimestamp, - "Strings": Extract.runStrings, - "Extract IP addresses": Extract.runIp, - "Extract email addresses": Extract.runEmail, - "Extract MAC addresses": Extract.runMac, - "Extract URLs": Extract.runUrls, - "Extract domains": Extract.runDomains, - "Extract file paths": Extract.runFilePaths, - "Extract dates": Extract.runDates, "Microsoft Script Decoder": MS.runDecodeScript, "Entropy": Entropy.runEntropy, "Frequency distribution": Entropy.runFreqDistrib, diff --git a/src/core/config/modules/OpModules.js b/src/core/config/modules/OpModules.js index 3f3963c3..9a5e3ff5 100644 --- a/src/core/config/modules/OpModules.js +++ b/src/core/config/modules/OpModules.js @@ -18,6 +18,7 @@ import HTTPModule from "./HTTP.js"; import ImageModule from "./Image.js"; import JSBNModule from "./JSBN.js"; import PublicKeyModule from "./PublicKey.js"; +import RegexModule from "./Regex.js"; import ShellcodeModule from "./Shellcode.js"; import URLModule from "./URL.js"; @@ -34,6 +35,7 @@ Object.assign( ImageModule, JSBNModule, PublicKeyModule, + RegexModule, ShellcodeModule, URLModule ); diff --git a/src/core/config/modules/Regex.js b/src/core/config/modules/Regex.js new file mode 100644 index 00000000..f7dc3d31 --- /dev/null +++ b/src/core/config/modules/Regex.js @@ -0,0 +1,30 @@ +import Extract from "../../operations/Extract.js"; +import Regex from "../../operations/Regex.js"; + + +/** + * Regex module. + * + * Libraries: + * - XRegExp + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ +let OpModules = typeof self === "undefined" ? {} : self.OpModules || {}; + +OpModules.Regex = { + "Regular expression": Regex.runRegex, + "Find / Replace": Regex.runFindReplace, + "Strings": Extract.runStrings, + "Extract IP addresses": Extract.runIp, + "Extract email addresses": Extract.runEmail, + "Extract MAC addresses": Extract.runMac, + "Extract URLs": Extract.runUrls, + "Extract domains": Extract.runDomains, + "Extract file paths": Extract.runFilePaths, + "Extract dates": Extract.runDates, +}; + +export default OpModules; diff --git a/src/core/operations/Extract.js b/src/core/operations/Extract.js index 54b25b32..1885f163 100755 --- a/src/core/operations/Extract.js +++ b/src/core/operations/Extract.js @@ -1,3 +1,6 @@ +import XRegExp from "xregexp"; + + /** * Identifier extraction operations. * @@ -49,6 +52,11 @@ const Extract = { * @default */ DISPLAY_TOTAL: false, + /** + * @constant + * @default + */ + ENCODING_LIST: ["All", "Single byte", "16-bit littleendian", "16-bit bigendian"], /** * Strings operation. @@ -58,10 +66,27 @@ const Extract = { * @returns {string} */ runStrings: function(input, args) { - let minLen = args[0] || Extract.MIN_STRING_LEN, + const minLen = args[0] || Extract.MIN_STRING_LEN, displayTotal = args[1], - strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]", - regex = new RegExp(strings + "{" + minLen + ",}", "ig"); + encoding = args[2]; + let strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]"; + + switch (encoding) { + case "All": + strings = "(\x00?" + strings + "\x00?)"; + break; + case "16-bit littleendian": + strings = "(" + strings + "\x00)"; + break; + case "16-bit bigendian": + strings = "(\x00" + strings + ")"; + break; + case "Single byte": + default: + break; + } + + const regex = new XRegExp(strings + "{" + minLen + ",}", "ig"); return Extract._search(input, regex, null, displayTotal); }, diff --git a/src/core/operations/Regex.js b/src/core/operations/Regex.js new file mode 100644 index 00000000..95916e3f --- /dev/null +++ b/src/core/operations/Regex.js @@ -0,0 +1,272 @@ +import XRegExp from "xregexp"; +import Utils from "../Utils.js"; + + +/** + * Regex operations. + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + * + * @namespace + */ +const Regex = { + + /** + * @constant + * @default + */ + REGEX_PRE_POPULATE: [ + { + name: "User defined", + value: "" + }, + { + name: "IPv4 address", + value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?" + }, + { + name: "IPv6 address", + value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})" + }, + { + name: "Email address", + value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})" + }, + { + name: "URL", + value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?" + }, + { + name: "Domain", + value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b" + }, + { + name: "Windows file path", + value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?" + }, + { + name: "UNIX file path", + value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+" + }, + { + name: "MAC address", + value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}" + }, + { + name: "Date (yyyy-mm-dd)", + value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" + }, + { + name: "Date (dd/mm/yyyy)", + value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)" + }, + { + name: "Date (mm/dd/yyyy)", + value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)" + }, + { + name: "Strings", + value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}" + }, + ], + /** + * @constant + * @default + */ + REGEX_CASE_INSENSITIVE: true, + /** + * @constant + * @default + */ + REGEX_MULTILINE_MATCHING: true, + /** + * @constant + * @default + */ + OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"], + /** + * @constant + * @default + */ + DISPLAY_TOTAL: false, + + /** + * Regular expression operation. + * + * @param {string} input + * @param {Object[]} args + * @returns {html} + */ + runRegex: function(input, args) { + let userRegex = args[1], + i = args[2], + m = args[3], + displayTotal = args[4], + outputFormat = args[5], + modifiers = "g"; + + if (i) modifiers += "i"; + if (m) modifiers += "m"; + + if (userRegex && userRegex !== "^" && userRegex !== "$") { + try { + const regex = new XRegExp(userRegex, modifiers); + + switch (outputFormat) { + case "Highlight matches": + return Regex._regexHighlight(input, regex, displayTotal); + case "List matches": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, false)); + case "List capture groups": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, false, true)); + case "List matches with capture groups": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, true)); + default: + return "Error: Invalid output format"; + } + } catch (err) { + return "Invalid regex. Details: " + err.message; + } + } else { + return Utils.escapeHtml(input); + } + }, + + + /** + * @constant + * @default + */ + SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"], + /** + * @constant + * @default + */ + FIND_REPLACE_GLOBAL: true, + /** + * @constant + * @default + */ + FIND_REPLACE_CASE: false, + /** + * @constant + * @default + */ + FIND_REPLACE_MULTILINE: true, + + /** + * Find / Replace operation. + * + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + runFindReplace: function(input, args) { + let find = args[0].string, + type = args[0].option, + replace = args[1], + g = args[2], + i = args[3], + m = args[4], + modifiers = ""; + + if (g) modifiers += "g"; + if (i) modifiers += "i"; + if (m) modifiers += "m"; + + if (type === "Regex") { + find = new RegExp(find, modifiers); + return input.replace(find, replace); + } + + if (type.indexOf("Extended") === 0) { + find = Utils.parseEscapedChars(find); + } + + find = new RegExp(Utils.escapeRegex(find), modifiers); + + return input.replace(find, replace); + }, + + + /** + * Adds HTML highlights to matches within a string. + * + * @private + * @param {string} input + * @param {RegExp} regex + * @param {boolean} displayTotal + * @returns {string} + */ + _regexHighlight: function(input, regex, displayTotal) { + let output = "", + m, + hl = 1, + i = 0, + total = 0; + + while ((m = regex.exec(input))) { + // Add up to match + output += Utils.escapeHtml(input.slice(i, m.index)); + + // Add match with highlighting + output += "" + Utils.escapeHtml(m[0]) + ""; + + // Switch highlight + hl = hl === 1 ? 2 : 1; + + i = regex.lastIndex; + total++; + } + + // Add all after final match + output += Utils.escapeHtml(input.slice(i, input.length)); + + if (displayTotal) + output = "Total found: " + total + "\n\n" + output; + + return output; + }, + + + /** + * Creates a string listing the matches within a string. + * + * @private + * @param {string} input + * @param {RegExp} regex + * @param {boolean} displayTotal + * @param {boolean} matches - Display full match + * @param {boolean} captureGroups - Display each of the capture groups separately + * @returns {string} + */ + _regexList: function(input, regex, displayTotal, matches, captureGroups) { + let output = "", + total = 0, + match; + + while ((match = regex.exec(input))) { + total++; + if (matches) { + output += match[0] + "\n"; + } + if (captureGroups) { + for (let i = 1; i < match.length; i++) { + if (matches) { + output += " Group " + i + ": "; + } + output += match[i] + "\n"; + } + } + } + + if (displayTotal) + output = "Total found: " + total + "\n\n" + output; + + return output; + }, +}; + +export default Regex; diff --git a/src/core/operations/StrUtils.js b/src/core/operations/StrUtils.js index 23b5eb26..c4f5664f 100755 --- a/src/core/operations/StrUtils.js +++ b/src/core/operations/StrUtils.js @@ -12,128 +12,6 @@ import Utils from "../Utils.js"; */ const StrUtils = { - /** - * @constant - * @default - */ - REGEX_PRE_POPULATE: [ - { - name: "User defined", - value: "" - }, - { - name: "IPv4 address", - value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?" - }, - { - name: "IPv6 address", - value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})" - }, - { - name: "Email address", - value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})" - }, - { - name: "URL", - value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?" - }, - { - name: "Domain", - value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b" - }, - { - name: "Windows file path", - value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?" - }, - { - name: "UNIX file path", - value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+" - }, - { - name: "MAC address", - value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}" - }, - { - name: "Date (yyyy-mm-dd)", - value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" - }, - { - name: "Date (dd/mm/yyyy)", - value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)" - }, - { - name: "Date (mm/dd/yyyy)", - value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)" - }, - { - name: "Strings", - value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}" - }, - ], - /** - * @constant - * @default - */ - REGEX_CASE_INSENSITIVE: true, - /** - * @constant - * @default - */ - REGEX_MULTILINE_MATCHING: true, - /** - * @constant - * @default - */ - OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"], - /** - * @constant - * @default - */ - DISPLAY_TOTAL: false, - - /** - * Regular expression operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {html} - */ - runRegex: function(input, args) { - let userRegex = args[1], - i = args[2], - m = args[3], - displayTotal = args[4], - outputFormat = args[5], - modifiers = "g"; - - if (i) modifiers += "i"; - if (m) modifiers += "m"; - - if (userRegex && userRegex !== "^" && userRegex !== "$") { - try { - const regex = new RegExp(userRegex, modifiers); - - switch (outputFormat) { - case "Highlight matches": - return StrUtils._regexHighlight(input, regex, displayTotal); - case "List matches": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, false)); - case "List capture groups": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, false, true)); - case "List matches with capture groups": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, true)); - default: - return "Error: Invalid output format"; - } - } catch (err) { - return "Invalid regex. Details: " + err.message; - } - } else { - return Utils.escapeHtml(input); - } - }, - - /** * @constant * @default @@ -183,62 +61,6 @@ const StrUtils = { }, - /** - * @constant - * @default - */ - SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"], - /** - * @constant - * @default - */ - FIND_REPLACE_GLOBAL: true, - /** - * @constant - * @default - */ - FIND_REPLACE_CASE: false, - /** - * @constant - * @default - */ - FIND_REPLACE_MULTILINE: true, - - /** - * Find / Replace operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runFindReplace: function(input, args) { - let find = args[0].string, - type = args[0].option, - replace = args[1], - g = args[2], - i = args[3], - m = args[4], - modifiers = ""; - - if (g) modifiers += "g"; - if (i) modifiers += "i"; - if (m) modifiers += "m"; - - if (type === "Regex") { - find = new RegExp(find, modifiers); - return input.replace(find, replace); - } - - if (type.indexOf("Extended") === 0) { - find = Utils.parseEscapedChars(find); - } - - find = new RegExp(Utils.escapeRegex(find), modifiers); - - return input.replace(find, replace); - }, - - /** * @constant * @default @@ -507,84 +329,6 @@ const StrUtils = { }) .join(delimiter); }, - - - /** - * Adds HTML highlights to matches within a string. - * - * @private - * @param {string} input - * @param {RegExp} regex - * @param {boolean} displayTotal - * @returns {string} - */ - _regexHighlight: function(input, regex, displayTotal) { - let output = "", - m, - hl = 1, - i = 0, - total = 0; - - while ((m = regex.exec(input))) { - // Add up to match - output += Utils.escapeHtml(input.slice(i, m.index)); - - // Add match with highlighting - output += "" + Utils.escapeHtml(m[0]) + ""; - - // Switch highlight - hl = hl === 1 ? 2 : 1; - - i = regex.lastIndex; - total++; - } - - // Add all after final match - output += Utils.escapeHtml(input.slice(i, input.length)); - - if (displayTotal) - output = "Total found: " + total + "\n\n" + output; - - return output; - }, - - - /** - * Creates a string listing the matches within a string. - * - * @private - * @param {string} input - * @param {RegExp} regex - * @param {boolean} displayTotal - * @param {boolean} matches - Display full match - * @param {boolean} captureGroups - Display each of the capture groups separately - * @returns {string} - */ - _regexList: function(input, regex, displayTotal, matches, captureGroups) { - let output = "", - total = 0, - match; - - while ((match = regex.exec(input))) { - total++; - if (matches) { - output += match[0] + "\n"; - } - if (captureGroups) { - for (let i = 1; i < match.length; i++) { - if (matches) { - output += " Group " + i + ": "; - } - output += match[i] + "\n"; - } - } - } - - if (displayTotal) - output = "Total found: " + total + "\n\n" + output; - - return output; - }, }; export default StrUtils; From ec02b7dedaea35558176e7e4dd34669d2677ab20 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Wed, 10 Jan 2018 19:44:25 +0000 Subject: [PATCH 2/5] Regexes are now checked for 0-length matches and incremented manually to avoid infinite loops --- src/core/config/OperationConfig.js | 3 --- src/core/operations/Extract.js | 5 +++++ src/core/operations/Regex.js | 10 ++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 50091bf9..5d6382f9 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -2061,7 +2061,6 @@ const OperationConfig = { "Find / Replace": { module: "Regex", description: "Replaces all occurrences of the first string with the second.

Includes support for regular expressions (regex), simple strings and extended strings (which support \\n, \\r, \\t, \\b, \\f and escaped hex bytes using \\x notation, e.g. \\x00 for a null byte).", - manualBake: true, inputType: "string", outputType: "string", args: [ @@ -2139,7 +2138,6 @@ const OperationConfig = { "Filter": { module: "Default", description: "Splits up the input using the specified delimiter and then filters each branch based on a regular expression.", - manualBake: true, inputType: "string", outputType: "string", args: [ @@ -2302,7 +2300,6 @@ const OperationConfig = { "Regular expression": { module: "Regex", description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.", - manualBake: true, inputType: "string", outputType: "html", args: [ diff --git a/src/core/operations/Extract.js b/src/core/operations/Extract.js index 1885f163..faf011e5 100755 --- a/src/core/operations/Extract.js +++ b/src/core/operations/Extract.js @@ -29,6 +29,11 @@ const Extract = { match; while ((match = searchRegex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (match.index === searchRegex.lastIndex) { + searchRegex.lastIndex++; + } + if (removeRegex && removeRegex.test(match[0])) continue; total++; diff --git a/src/core/operations/Regex.js b/src/core/operations/Regex.js index 95916e3f..5cb374d1 100644 --- a/src/core/operations/Regex.js +++ b/src/core/operations/Regex.js @@ -208,6 +208,11 @@ const Regex = { total = 0; while ((m = regex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (m.index === regex.lastIndex) { + regex.lastIndex++; + } + // Add up to match output += Utils.escapeHtml(input.slice(i, m.index)); @@ -248,6 +253,11 @@ const Regex = { match; while ((match = regex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (match.index === regex.lastIndex) { + regex.lastIndex++; + } + total++; if (matches) { output += match[0] + "\n"; From f2c073798bada376bd1a0cb13c5d3fb92eaa02af Mon Sep 17 00:00:00 2001 From: n1474335 Date: Fri, 12 Jan 2018 23:09:27 +0000 Subject: [PATCH 3/5] 'Strings' now supports various different match types in ASCII and Unicode --- src/core/config/OperationConfig.js | 15 ++++--- src/core/operations/Extract.js | 68 ++++++++++++++++++++++++------ 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 5d6382f9..08c18d25 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -2164,20 +2164,25 @@ const OperationConfig = { inputType: "string", outputType: "string", args: [ + { + name: "Encoding", + type: "option", + value: Extract.ENCODING_LIST + }, { name: "Minimum length", type: "number", value: Extract.MIN_STRING_LEN }, + { + name: "Match", + type: "option", + value: Extract.STRING_MATCH_TYPE + }, { name: "Display total", type: "boolean", value: Extract.DISPLAY_TOTAL - }, - { - name: "Encoding", - type: "option", - value: Extract.ENCODING_LIST } ] }, diff --git a/src/core/operations/Extract.js b/src/core/operations/Extract.js index faf011e5..221b2823 100755 --- a/src/core/operations/Extract.js +++ b/src/core/operations/Extract.js @@ -51,17 +51,25 @@ const Extract = { * @constant * @default */ - MIN_STRING_LEN: 3, + MIN_STRING_LEN: 4, + /** + * @constant + * @default + */ + STRING_MATCH_TYPE: [ + "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)", + "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)" + ], + /** + * @constant + * @default + */ + ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"], /** * @constant * @default */ DISPLAY_TOTAL: false, - /** - * @constant - * @default - */ - ENCODING_LIST: ["All", "Single byte", "16-bit littleendian", "16-bit bigendian"], /** * Strings operation. @@ -71,27 +79,59 @@ const Extract = { * @returns {string} */ runStrings: function(input, args) { - const minLen = args[0] || Extract.MIN_STRING_LEN, - displayTotal = args[1], - encoding = args[2]; - let strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]"; + const encoding = args[0], + minLen = args[1], + matchType = args[2], + displayTotal = args[3], + alphanumeric = "A-Z\\d", + punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@", + printable = "\x20-\x7e", + uniAlphanumeric = "\\pL\\pN", + uniPunctuation = "\\pP\\pZ", + uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP"; + let strings = ""; + + switch (matchType) { + case "Alphanumeric + punctuation (A)": + strings = `[${alphanumeric + punctuation}]`; + break; + case "All printable chars (A)": + case "Null-terminated strings (A)": + strings = `[${printable}]`; + break; + case "Alphanumeric + punctuation (U)": + strings = `[${uniAlphanumeric + uniPunctuation}]`; + break; + case "All printable chars (U)": + case "Null-terminated strings (U)": + strings = `[${uniPrintable}]`; + break; + } + + // UTF-16 support is hacked in by allowing null bytes on either side of the matched chars switch (encoding) { case "All": - strings = "(\x00?" + strings + "\x00?)"; + strings = `(\x00?${strings}\x00?)`; break; case "16-bit littleendian": - strings = "(" + strings + "\x00)"; + strings = `(${strings}\x00)`; break; case "16-bit bigendian": - strings = "(\x00" + strings + ")"; + strings = `(\x00${strings})`; break; case "Single byte": default: break; } - const regex = new XRegExp(strings + "{" + minLen + ",}", "ig"); + strings = `${strings}{${minLen},}`; + + if (matchType.includes("Null-terminated")) { + strings += "\x00"; + } + + const regex = new XRegExp(strings, "ig"); return Extract._search(input, regex, null, displayTotal); }, From b07c014b486a9b16bba6ced434dbfc8c12aad849 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Fri, 12 Jan 2018 23:42:48 +0000 Subject: [PATCH 4/5] Added more modifiers to the Regex operation --- src/core/config/OperationConfig.js | 21 ++++++++++++++++++--- src/core/operations/Regex.js | 26 +++++++++++--------------- test/index.js | 1 + test/tests/operations/StrUtils.js | 15 --------------- 4 files changed, 30 insertions(+), 33 deletions(-) diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 08c18d25..c5b9dd27 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -2322,12 +2322,27 @@ const OperationConfig = { { name: "Case insensitive", type: "boolean", - value: Regex.REGEX_CASE_INSENSITIVE + value: true }, { - name: "Multiline matching", + name: "^ and $ match at newlines", type: "boolean", - value: Regex.REGEX_MULTILINE_MATCHING + value: true + }, + { + name: "Dot matches all", + type: "boolean", + value: false + }, + { + name: "Unicode support", + type: "boolean", + value: false + }, + { + name: "Astral support", + type: "boolean", + value: false }, { name: "Display total", diff --git a/src/core/operations/Regex.js b/src/core/operations/Regex.js index 5cb374d1..9c6b2f8e 100644 --- a/src/core/operations/Regex.js +++ b/src/core/operations/Regex.js @@ -71,16 +71,6 @@ const Regex = { value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}" }, ], - /** - * @constant - * @default - */ - REGEX_CASE_INSENSITIVE: true, - /** - * @constant - * @default - */ - REGEX_MULTILINE_MATCHING: true, /** * @constant * @default @@ -100,15 +90,21 @@ const Regex = { * @returns {html} */ runRegex: function(input, args) { - let userRegex = args[1], + const userRegex = args[1], i = args[2], m = args[3], - displayTotal = args[4], - outputFormat = args[5], - modifiers = "g"; + s = args[4], + u = args[5], + a = args[6], + displayTotal = args[7], + outputFormat = args[8]; + let modifiers = "g"; if (i) modifiers += "i"; if (m) modifiers += "m"; + if (s) modifiers += "s"; + if (u) modifiers += "u"; + if (a) modifiers += "A"; if (userRegex && userRegex !== "^" && userRegex !== "$") { try { @@ -275,7 +271,7 @@ const Regex = { if (displayTotal) output = "Total found: " + total + "\n\n" + output; - return output; + return output.slice(0, -1); }, }; diff --git a/test/index.js b/test/index.js index 5c397dea..e58d7e20 100644 --- a/test/index.js +++ b/test/index.js @@ -30,6 +30,7 @@ import "./tests/operations/MS.js"; import "./tests/operations/PHP.js"; import "./tests/operations/NetBIOS.js"; import "./tests/operations/OTP.js"; +import "./tests/operations/Regex.js"; import "./tests/operations/StrUtils.js"; import "./tests/operations/SeqUtils.js"; diff --git a/test/tests/operations/StrUtils.js b/test/tests/operations/StrUtils.js index 6e66b266..8110d067 100644 --- a/test/tests/operations/StrUtils.js +++ b/test/tests/operations/StrUtils.js @@ -8,21 +8,6 @@ import TestRegister from "../../TestRegister.js"; TestRegister.addTests([ - { - name: "Regex, non-HTML op", - input: "/<>", - expectedOutput: "/<>", - recipeConfig: [ - { - "op": "Regular expression", - "args": ["User defined", "", true, true, false, "Highlight matches"] - }, - { - "op": "Remove whitespace", - "args": [true, true, true, true, true, false] - } - ], - }, { name: "Diff, basic usage", input: "testing23\n\ntesting123", From 71067939e392f02b7d92ed54e8967c08e414d3e6 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Fri, 12 Jan 2018 23:51:51 +0000 Subject: [PATCH 5/5] Added Regex tests and updated description --- src/core/config/OperationConfig.js | 2 +- test/tests/operations/Regex.js | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 test/tests/operations/Regex.js diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index c5b9dd27..a6cf4380 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -2304,7 +2304,7 @@ const OperationConfig = { }, "Regular expression": { module: "Regex", - description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.", + description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.

Supports extended regex syntax including the 'dot matches all' flag, named capture groups, full unicode coverage (including \\p{} categories and scripts as well as astral codes) and recursive matching.", inputType: "string", outputType: "html", args: [ diff --git a/test/tests/operations/Regex.js b/test/tests/operations/Regex.js new file mode 100644 index 00000000..dc16910f --- /dev/null +++ b/test/tests/operations/Regex.js @@ -0,0 +1,59 @@ +/** + * StrUtils tests. + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2017 + * @license Apache-2.0 + */ +import TestRegister from "../../TestRegister.js"; + +TestRegister.addTests([ + { + name: "Regex: non-HTML op", + input: "/<>", + expectedOutput: "/<>", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "", true, true, false, false, false, false, "Highlight matches"] + }, + { + "op": "Remove whitespace", + "args": [true, true, true, true, true, false] + } + ], + }, + { + name: "Regex: Dot matches all", + input: "Hello\nWorld", + expectedOutput: "Hello\nWorld", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", ".+", true, true, true, false, false, false, "List matches"] + } + ], + }, + { + name: "Regex: Astral off", + input: "šŒ†šŸ˜†", + expectedOutput: "", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "\\pS", true, true, false, false, false, false, "List matches"] + } + ], + }, + { + name: "Regex: Astral on", + input: "šŒ†šŸ˜†", + expectedOutput: "šŒ†\nšŸ˜†", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "\\pS", true, true, false, false, true, false, "List matches"] + } + ], + } +]);