diff --git a/package-lock.json b/package-lock.json index aead053f..50df6bf0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10256,6 +10256,11 @@ "resolved": "https://registry.npmjs.org/xpath/-/xpath-0.0.27.tgz", "integrity": "sha512-fg03WRxtkCV6ohClePNAECYsmpKKTv5L8y/X3Dn1hQrec3POx2jHZ/0P2qQ6HvsrU1BmeqXcof3NGGueG6LxwQ==" }, + "xregexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/xregexp/-/xregexp-4.0.0.tgz", + "integrity": "sha512-PHyM+sQouu7xspQQwELlGwwd05mXUFqwFYfqPO0cC7x4fxyHnnuetmQr6CjJiafIDoH4MogHb9dOoJzR/Y4rFg==" + }, "xtend": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz", diff --git a/package.json b/package.json index 1804e308..fb0e9bbb 100644 --- a/package.json +++ b/package.json @@ -103,6 +103,7 @@ "vkbeautify": "^0.99.3", "xmldom": "^0.1.27", "xpath": "0.0.27", + "xregexp": "^4.0.0", "zlibjs": "^0.3.1" }, "scripts": { diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 41fbfe07..e1454d0a 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -30,6 +30,7 @@ import NetBIOS from "../operations/NetBIOS.js"; import PHP from "../operations/PHP.js"; import PublicKey from "../operations/PublicKey.js"; import Punycode from "../operations/Punycode.js"; +import Regex from "../operations/Regex.js"; import Rotate from "../operations/Rotate.js"; import SeqUtils from "../operations/SeqUtils.js"; import Shellcode from "../operations/Shellcode.js"; @@ -2058,9 +2059,8 @@ const OperationConfig = { args: [] }, "Find / Replace": { - module: "Default", + module: "Regex", description: "Replaces all occurrences of the first string with the second.

Includes support for regular expressions (regex), simple strings and extended strings (which support \\n, \\r, \\t, \\b, \\f and escaped hex bytes using \\x notation, e.g. \\x00 for a null byte).", - manualBake: true, inputType: "string", outputType: "string", args: [ @@ -2068,7 +2068,7 @@ const OperationConfig = { name: "Find", type: "toggleString", value: "", - toggleValues: StrUtils.SEARCH_TYPE + toggleValues: Regex.SEARCH_TYPE }, { name: "Replace", @@ -2078,17 +2078,17 @@ const OperationConfig = { { name: "Global match", type: "boolean", - value: StrUtils.FIND_REPLACE_GLOBAL, + value: Regex.FIND_REPLACE_GLOBAL, }, { name: "Case insensitive", type: "boolean", - value: StrUtils.FIND_REPLACE_CASE, + value: Regex.FIND_REPLACE_CASE, }, { name: "Multiline matching", type: "boolean", - value: StrUtils.FIND_REPLACE_MULTILINE, + value: Regex.FIND_REPLACE_MULTILINE, }, ] @@ -2138,7 +2138,6 @@ const OperationConfig = { "Filter": { module: "Default", description: "Splits up the input using the specified delimiter and then filters each branch based on a regular expression.", - manualBake: true, inputType: "string", outputType: "string", args: [ @@ -2160,16 +2159,26 @@ const OperationConfig = { ] }, "Strings": { - module: "Default", + module: "Regex", description: "Extracts all strings from the input.", inputType: "string", outputType: "string", args: [ + { + name: "Encoding", + type: "option", + value: Extract.ENCODING_LIST + }, { name: "Minimum length", type: "number", value: Extract.MIN_STRING_LEN }, + { + name: "Match", + type: "option", + value: Extract.STRING_MATCH_TYPE + }, { name: "Display total", type: "boolean", @@ -2178,7 +2187,7 @@ const OperationConfig = { ] }, "Extract IP addresses": { - module: "Default", + module: "Regex", description: "Extracts all IPv4 and IPv6 addresses.

Warning: Given a string 710.65.0.456, this will match 10.65.0.45 so always check the original input!", inputType: "string", outputType: "string", @@ -2206,7 +2215,7 @@ const OperationConfig = { ] }, "Extract email addresses": { - module: "Default", + module: "Regex", description: "Extracts all email addresses from the input.", inputType: "string", outputType: "string", @@ -2219,7 +2228,7 @@ const OperationConfig = { ] }, "Extract MAC addresses": { - module: "Default", + module: "Regex", description: "Extracts all Media Access Control (MAC) addresses from the input.", inputType: "string", outputType: "string", @@ -2232,7 +2241,7 @@ const OperationConfig = { ] }, "Extract URLs": { - module: "Default", + module: "Regex", description: "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives.", inputType: "string", outputType: "string", @@ -2245,7 +2254,7 @@ const OperationConfig = { ] }, "Extract domains": { - module: "Default", + module: "Regex", description: "Extracts domain names.
Note that this will not include paths. Use Extract URLs to find entire URLs.", inputType: "string", outputType: "string", @@ -2258,7 +2267,7 @@ const OperationConfig = { ] }, "Extract file paths": { - module: "Default", + module: "Regex", description: "Extracts anything that looks like a Windows or UNIX file path.

Note that if UNIX is selected, there will likely be a lot of false positives.", inputType: "string", outputType: "string", @@ -2281,7 +2290,7 @@ const OperationConfig = { ] }, "Extract dates": { - module: "Default", + module: "Regex", description: "Extracts dates in the following formatsDividers can be any of /, -, . or space", inputType: "string", outputType: "string", @@ -2294,16 +2303,15 @@ const OperationConfig = { ] }, "Regular expression": { - module: "Default", - description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.", - manualBake: true, + module: "Regex", + description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.

Supports extended regex syntax including the 'dot matches all' flag, named capture groups, full unicode coverage (including \\p{} categories and scripts as well as astral codes) and recursive matching.", inputType: "string", outputType: "html", args: [ { name: "Built in regexes", type: "populateOption", - value: StrUtils.REGEX_PRE_POPULATE, + value: Regex.REGEX_PRE_POPULATE, target: 1, }, { @@ -2314,22 +2322,37 @@ const OperationConfig = { { name: "Case insensitive", type: "boolean", - value: StrUtils.REGEX_CASE_INSENSITIVE + value: true }, { - name: "Multiline matching", + name: "^ and $ match at newlines", type: "boolean", - value: StrUtils.REGEX_MULTILINE_MATCHING + value: true + }, + { + name: "Dot matches all", + type: "boolean", + value: false + }, + { + name: "Unicode support", + type: "boolean", + value: false + }, + { + name: "Astral support", + type: "boolean", + value: false }, { name: "Display total", type: "boolean", - value: StrUtils.DISPLAY_TOTAL + value: Regex.DISPLAY_TOTAL }, { name: "Output format", type: "option", - value: StrUtils.OUTPUT_FORMAT + value: Regex.OUTPUT_FORMAT }, ] }, diff --git a/src/core/config/modules/Default.js b/src/core/config/modules/Default.js index 3963d943..1afb8bcc 100644 --- a/src/core/config/modules/Default.js +++ b/src/core/config/modules/Default.js @@ -10,7 +10,6 @@ import Convert from "../../operations/Convert.js"; import DateTime from "../../operations/DateTime.js"; import Endian from "../../operations/Endian.js"; import Entropy from "../../operations/Entropy.js"; -import Extract from "../../operations/Extract.js"; import FileType from "../../operations/FileType.js"; import Hexdump from "../../operations/Hexdump.js"; import HTML from "../../operations/HTML.js"; @@ -99,11 +98,9 @@ OpModules.Default = { "Format MAC addresses": MAC.runFormat, "Encode NetBIOS Name": NetBIOS.runEncodeName, "Decode NetBIOS Name": NetBIOS.runDecodeName, - "Regular expression": StrUtils.runRegex, "Offset checker": StrUtils.runOffsetChecker, "To Upper case": StrUtils.runUpper, "To Lower case": StrUtils.runLower, - "Find / Replace": StrUtils.runFindReplace, "Split": StrUtils.runSplit, "Filter": StrUtils.runFilter, "Escape string": StrUtils.runEscape, @@ -133,14 +130,6 @@ OpModules.Default = { "Translate DateTime Format": DateTime.runTranslateFormat, "From UNIX Timestamp": DateTime.runFromUnixTimestamp, "To UNIX Timestamp": DateTime.runToUnixTimestamp, - "Strings": Extract.runStrings, - "Extract IP addresses": Extract.runIp, - "Extract email addresses": Extract.runEmail, - "Extract MAC addresses": Extract.runMac, - "Extract URLs": Extract.runUrls, - "Extract domains": Extract.runDomains, - "Extract file paths": Extract.runFilePaths, - "Extract dates": Extract.runDates, "Microsoft Script Decoder": MS.runDecodeScript, "Entropy": Entropy.runEntropy, "Frequency distribution": Entropy.runFreqDistrib, diff --git a/src/core/config/modules/OpModules.js b/src/core/config/modules/OpModules.js index 3f3963c3..9a5e3ff5 100644 --- a/src/core/config/modules/OpModules.js +++ b/src/core/config/modules/OpModules.js @@ -18,6 +18,7 @@ import HTTPModule from "./HTTP.js"; import ImageModule from "./Image.js"; import JSBNModule from "./JSBN.js"; import PublicKeyModule from "./PublicKey.js"; +import RegexModule from "./Regex.js"; import ShellcodeModule from "./Shellcode.js"; import URLModule from "./URL.js"; @@ -34,6 +35,7 @@ Object.assign( ImageModule, JSBNModule, PublicKeyModule, + RegexModule, ShellcodeModule, URLModule ); diff --git a/src/core/config/modules/Regex.js b/src/core/config/modules/Regex.js new file mode 100644 index 00000000..f7dc3d31 --- /dev/null +++ b/src/core/config/modules/Regex.js @@ -0,0 +1,30 @@ +import Extract from "../../operations/Extract.js"; +import Regex from "../../operations/Regex.js"; + + +/** + * Regex module. + * + * Libraries: + * - XRegExp + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ +let OpModules = typeof self === "undefined" ? {} : self.OpModules || {}; + +OpModules.Regex = { + "Regular expression": Regex.runRegex, + "Find / Replace": Regex.runFindReplace, + "Strings": Extract.runStrings, + "Extract IP addresses": Extract.runIp, + "Extract email addresses": Extract.runEmail, + "Extract MAC addresses": Extract.runMac, + "Extract URLs": Extract.runUrls, + "Extract domains": Extract.runDomains, + "Extract file paths": Extract.runFilePaths, + "Extract dates": Extract.runDates, +}; + +export default OpModules; diff --git a/src/core/operations/Extract.js b/src/core/operations/Extract.js index 54b25b32..221b2823 100755 --- a/src/core/operations/Extract.js +++ b/src/core/operations/Extract.js @@ -1,3 +1,6 @@ +import XRegExp from "xregexp"; + + /** * Identifier extraction operations. * @@ -26,6 +29,11 @@ const Extract = { match; while ((match = searchRegex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (match.index === searchRegex.lastIndex) { + searchRegex.lastIndex++; + } + if (removeRegex && removeRegex.test(match[0])) continue; total++; @@ -43,7 +51,20 @@ const Extract = { * @constant * @default */ - MIN_STRING_LEN: 3, + MIN_STRING_LEN: 4, + /** + * @constant + * @default + */ + STRING_MATCH_TYPE: [ + "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)", + "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)" + ], + /** + * @constant + * @default + */ + ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"], /** * @constant * @default @@ -58,10 +79,59 @@ const Extract = { * @returns {string} */ runStrings: function(input, args) { - let minLen = args[0] || Extract.MIN_STRING_LEN, - displayTotal = args[1], - strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]", - regex = new RegExp(strings + "{" + minLen + ",}", "ig"); + const encoding = args[0], + minLen = args[1], + matchType = args[2], + displayTotal = args[3], + alphanumeric = "A-Z\\d", + punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@", + printable = "\x20-\x7e", + uniAlphanumeric = "\\pL\\pN", + uniPunctuation = "\\pP\\pZ", + uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP"; + + let strings = ""; + + switch (matchType) { + case "Alphanumeric + punctuation (A)": + strings = `[${alphanumeric + punctuation}]`; + break; + case "All printable chars (A)": + case "Null-terminated strings (A)": + strings = `[${printable}]`; + break; + case "Alphanumeric + punctuation (U)": + strings = `[${uniAlphanumeric + uniPunctuation}]`; + break; + case "All printable chars (U)": + case "Null-terminated strings (U)": + strings = `[${uniPrintable}]`; + break; + } + + // UTF-16 support is hacked in by allowing null bytes on either side of the matched chars + switch (encoding) { + case "All": + strings = `(\x00?${strings}\x00?)`; + break; + case "16-bit littleendian": + strings = `(${strings}\x00)`; + break; + case "16-bit bigendian": + strings = `(\x00${strings})`; + break; + case "Single byte": + default: + break; + } + + strings = `${strings}{${minLen},}`; + + if (matchType.includes("Null-terminated")) { + strings += "\x00"; + } + + const regex = new XRegExp(strings, "ig"); return Extract._search(input, regex, null, displayTotal); }, diff --git a/src/core/operations/Regex.js b/src/core/operations/Regex.js new file mode 100644 index 00000000..9c6b2f8e --- /dev/null +++ b/src/core/operations/Regex.js @@ -0,0 +1,278 @@ +import XRegExp from "xregexp"; +import Utils from "../Utils.js"; + + +/** + * Regex operations. + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + * + * @namespace + */ +const Regex = { + + /** + * @constant + * @default + */ + REGEX_PRE_POPULATE: [ + { + name: "User defined", + value: "" + }, + { + name: "IPv4 address", + value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?" + }, + { + name: "IPv6 address", + value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})" + }, + { + name: "Email address", + value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})" + }, + { + name: "URL", + value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?" + }, + { + name: "Domain", + value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b" + }, + { + name: "Windows file path", + value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?" + }, + { + name: "UNIX file path", + value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+" + }, + { + name: "MAC address", + value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}" + }, + { + name: "Date (yyyy-mm-dd)", + value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" + }, + { + name: "Date (dd/mm/yyyy)", + value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)" + }, + { + name: "Date (mm/dd/yyyy)", + value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)" + }, + { + name: "Strings", + value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}" + }, + ], + /** + * @constant + * @default + */ + OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"], + /** + * @constant + * @default + */ + DISPLAY_TOTAL: false, + + /** + * Regular expression operation. + * + * @param {string} input + * @param {Object[]} args + * @returns {html} + */ + runRegex: function(input, args) { + const userRegex = args[1], + i = args[2], + m = args[3], + s = args[4], + u = args[5], + a = args[6], + displayTotal = args[7], + outputFormat = args[8]; + let modifiers = "g"; + + if (i) modifiers += "i"; + if (m) modifiers += "m"; + if (s) modifiers += "s"; + if (u) modifiers += "u"; + if (a) modifiers += "A"; + + if (userRegex && userRegex !== "^" && userRegex !== "$") { + try { + const regex = new XRegExp(userRegex, modifiers); + + switch (outputFormat) { + case "Highlight matches": + return Regex._regexHighlight(input, regex, displayTotal); + case "List matches": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, false)); + case "List capture groups": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, false, true)); + case "List matches with capture groups": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, true)); + default: + return "Error: Invalid output format"; + } + } catch (err) { + return "Invalid regex. Details: " + err.message; + } + } else { + return Utils.escapeHtml(input); + } + }, + + + /** + * @constant + * @default + */ + SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"], + /** + * @constant + * @default + */ + FIND_REPLACE_GLOBAL: true, + /** + * @constant + * @default + */ + FIND_REPLACE_CASE: false, + /** + * @constant + * @default + */ + FIND_REPLACE_MULTILINE: true, + + /** + * Find / Replace operation. + * + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + runFindReplace: function(input, args) { + let find = args[0].string, + type = args[0].option, + replace = args[1], + g = args[2], + i = args[3], + m = args[4], + modifiers = ""; + + if (g) modifiers += "g"; + if (i) modifiers += "i"; + if (m) modifiers += "m"; + + if (type === "Regex") { + find = new RegExp(find, modifiers); + return input.replace(find, replace); + } + + if (type.indexOf("Extended") === 0) { + find = Utils.parseEscapedChars(find); + } + + find = new RegExp(Utils.escapeRegex(find), modifiers); + + return input.replace(find, replace); + }, + + + /** + * Adds HTML highlights to matches within a string. + * + * @private + * @param {string} input + * @param {RegExp} regex + * @param {boolean} displayTotal + * @returns {string} + */ + _regexHighlight: function(input, regex, displayTotal) { + let output = "", + m, + hl = 1, + i = 0, + total = 0; + + while ((m = regex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (m.index === regex.lastIndex) { + regex.lastIndex++; + } + + // Add up to match + output += Utils.escapeHtml(input.slice(i, m.index)); + + // Add match with highlighting + output += "" + Utils.escapeHtml(m[0]) + ""; + + // Switch highlight + hl = hl === 1 ? 2 : 1; + + i = regex.lastIndex; + total++; + } + + // Add all after final match + output += Utils.escapeHtml(input.slice(i, input.length)); + + if (displayTotal) + output = "Total found: " + total + "\n\n" + output; + + return output; + }, + + + /** + * Creates a string listing the matches within a string. + * + * @private + * @param {string} input + * @param {RegExp} regex + * @param {boolean} displayTotal + * @param {boolean} matches - Display full match + * @param {boolean} captureGroups - Display each of the capture groups separately + * @returns {string} + */ + _regexList: function(input, regex, displayTotal, matches, captureGroups) { + let output = "", + total = 0, + match; + + while ((match = regex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (match.index === regex.lastIndex) { + regex.lastIndex++; + } + + total++; + if (matches) { + output += match[0] + "\n"; + } + if (captureGroups) { + for (let i = 1; i < match.length; i++) { + if (matches) { + output += " Group " + i + ": "; + } + output += match[i] + "\n"; + } + } + } + + if (displayTotal) + output = "Total found: " + total + "\n\n" + output; + + return output.slice(0, -1); + }, +}; + +export default Regex; diff --git a/src/core/operations/StrUtils.js b/src/core/operations/StrUtils.js index dd15b327..5c1f8f18 100755 --- a/src/core/operations/StrUtils.js +++ b/src/core/operations/StrUtils.js @@ -12,128 +12,6 @@ import Utils from "../Utils.js"; */ const StrUtils = { - /** - * @constant - * @default - */ - REGEX_PRE_POPULATE: [ - { - name: "User defined", - value: "" - }, - { - name: "IPv4 address", - value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?" - }, - { - name: "IPv6 address", - value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})" - }, - { - name: "Email address", - value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})" - }, - { - name: "URL", - value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?" - }, - { - name: "Domain", - value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b" - }, - { - name: "Windows file path", - value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?" - }, - { - name: "UNIX file path", - value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+" - }, - { - name: "MAC address", - value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}" - }, - { - name: "Date (yyyy-mm-dd)", - value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" - }, - { - name: "Date (dd/mm/yyyy)", - value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)" - }, - { - name: "Date (mm/dd/yyyy)", - value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)" - }, - { - name: "Strings", - value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}" - }, - ], - /** - * @constant - * @default - */ - REGEX_CASE_INSENSITIVE: true, - /** - * @constant - * @default - */ - REGEX_MULTILINE_MATCHING: true, - /** - * @constant - * @default - */ - OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"], - /** - * @constant - * @default - */ - DISPLAY_TOTAL: false, - - /** - * Regular expression operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {html} - */ - runRegex: function(input, args) { - let userRegex = args[1], - i = args[2], - m = args[3], - displayTotal = args[4], - outputFormat = args[5], - modifiers = "g"; - - if (i) modifiers += "i"; - if (m) modifiers += "m"; - - if (userRegex && userRegex !== "^" && userRegex !== "$") { - try { - const regex = new RegExp(userRegex, modifiers); - - switch (outputFormat) { - case "Highlight matches": - return StrUtils._regexHighlight(input, regex, displayTotal); - case "List matches": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, false)); - case "List capture groups": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, false, true)); - case "List matches with capture groups": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, true)); - default: - return "Error: Invalid output format"; - } - } catch (err) { - return "Invalid regex. Details: " + err.message; - } - } else { - return Utils.escapeHtml(input); - } - }, - - /** * @constant * @default @@ -183,62 +61,6 @@ const StrUtils = { }, - /** - * @constant - * @default - */ - SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"], - /** - * @constant - * @default - */ - FIND_REPLACE_GLOBAL: true, - /** - * @constant - * @default - */ - FIND_REPLACE_CASE: false, - /** - * @constant - * @default - */ - FIND_REPLACE_MULTILINE: true, - - /** - * Find / Replace operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runFindReplace: function(input, args) { - let find = args[0].string, - type = args[0].option, - replace = args[1], - g = args[2], - i = args[3], - m = args[4], - modifiers = ""; - - if (g) modifiers += "g"; - if (i) modifiers += "i"; - if (m) modifiers += "m"; - - if (type === "Regex") { - find = new RegExp(find, modifiers); - return input.replace(find, replace); - } - - if (type.indexOf("Extended") === 0) { - find = Utils.parseEscapedChars(find); - } - - find = new RegExp(Utils.escapeRegex(find), modifiers); - - return input.replace(find, replace); - }, - - /** * @constant * @default @@ -576,84 +398,6 @@ const StrUtils = { return dist.toString(); }, - - - /** - * Adds HTML highlights to matches within a string. - * - * @private - * @param {string} input - * @param {RegExp} regex - * @param {boolean} displayTotal - * @returns {string} - */ - _regexHighlight: function(input, regex, displayTotal) { - let output = "", - m, - hl = 1, - i = 0, - total = 0; - - while ((m = regex.exec(input))) { - // Add up to match - output += Utils.escapeHtml(input.slice(i, m.index)); - - // Add match with highlighting - output += "" + Utils.escapeHtml(m[0]) + ""; - - // Switch highlight - hl = hl === 1 ? 2 : 1; - - i = regex.lastIndex; - total++; - } - - // Add all after final match - output += Utils.escapeHtml(input.slice(i, input.length)); - - if (displayTotal) - output = "Total found: " + total + "\n\n" + output; - - return output; - }, - - - /** - * Creates a string listing the matches within a string. - * - * @private - * @param {string} input - * @param {RegExp} regex - * @param {boolean} displayTotal - * @param {boolean} matches - Display full match - * @param {boolean} captureGroups - Display each of the capture groups separately - * @returns {string} - */ - _regexList: function(input, regex, displayTotal, matches, captureGroups) { - let output = "", - total = 0, - match; - - while ((match = regex.exec(input))) { - total++; - if (matches) { - output += match[0] + "\n"; - } - if (captureGroups) { - for (let i = 1; i < match.length; i++) { - if (matches) { - output += " Group " + i + ": "; - } - output += match[i] + "\n"; - } - } - } - - if (displayTotal) - output = "Total found: " + total + "\n\n" + output; - - return output; - }, }; export default StrUtils; diff --git a/test/index.js b/test/index.js index 5c397dea..e58d7e20 100644 --- a/test/index.js +++ b/test/index.js @@ -30,6 +30,7 @@ import "./tests/operations/MS.js"; import "./tests/operations/PHP.js"; import "./tests/operations/NetBIOS.js"; import "./tests/operations/OTP.js"; +import "./tests/operations/Regex.js"; import "./tests/operations/StrUtils.js"; import "./tests/operations/SeqUtils.js"; diff --git a/test/tests/operations/Regex.js b/test/tests/operations/Regex.js new file mode 100644 index 00000000..dc16910f --- /dev/null +++ b/test/tests/operations/Regex.js @@ -0,0 +1,59 @@ +/** + * StrUtils tests. + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2017 + * @license Apache-2.0 + */ +import TestRegister from "../../TestRegister.js"; + +TestRegister.addTests([ + { + name: "Regex: non-HTML op", + input: "/<>", + expectedOutput: "/<>", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "", true, true, false, false, false, false, "Highlight matches"] + }, + { + "op": "Remove whitespace", + "args": [true, true, true, true, true, false] + } + ], + }, + { + name: "Regex: Dot matches all", + input: "Hello\nWorld", + expectedOutput: "Hello\nWorld", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", ".+", true, true, true, false, false, false, "List matches"] + } + ], + }, + { + name: "Regex: Astral off", + input: "šŒ†šŸ˜†", + expectedOutput: "", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "\\pS", true, true, false, false, false, false, "List matches"] + } + ], + }, + { + name: "Regex: Astral on", + input: "šŒ†šŸ˜†", + expectedOutput: "šŒ†\nšŸ˜†", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "\\pS", true, true, false, false, true, false, "List matches"] + } + ], + } +]); diff --git a/test/tests/operations/StrUtils.js b/test/tests/operations/StrUtils.js index 6e66b266..8110d067 100644 --- a/test/tests/operations/StrUtils.js +++ b/test/tests/operations/StrUtils.js @@ -8,21 +8,6 @@ import TestRegister from "../../TestRegister.js"; TestRegister.addTests([ - { - name: "Regex, non-HTML op", - input: "/<>", - expectedOutput: "/<>", - recipeConfig: [ - { - "op": "Regular expression", - "args": ["User defined", "", true, true, false, "Highlight matches"] - }, - { - "op": "Remove whitespace", - "args": [true, true, true, true, true, false] - } - ], - }, { name: "Diff, basic usage", input: "testing23\n\ntesting123",