From 6a561185df58c0e008aa5f8e671d87d15d455e3b Mon Sep 17 00:00:00 2001 From: Matt C Date: Mon, 28 May 2018 00:39:03 +0100 Subject: [PATCH] ESM: Ported RegEx operations --- src/core/operations/FindReplace.mjs | 86 +++++++ src/core/operations/RegularExpression.mjs | 263 ++++++++++++++++++++++ test/index.mjs | 2 +- 3 files changed, 350 insertions(+), 1 deletion(-) create mode 100644 src/core/operations/FindReplace.mjs create mode 100644 src/core/operations/RegularExpression.mjs diff --git a/src/core/operations/FindReplace.mjs b/src/core/operations/FindReplace.mjs new file mode 100644 index 00000000..b983e69a --- /dev/null +++ b/src/core/operations/FindReplace.mjs @@ -0,0 +1,86 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import Utils from "../Utils"; + +/** + * Find / Replace operation + */ +class FindReplace extends Operation { + + /** + * FindReplace constructor + */ + constructor() { + super(); + + this.name = "Find / Replace"; + this.module = "Regex"; + this.description = "Replaces all occurrences of the first string with the second.

Includes support for regular expressions (regex), simple strings and extended strings (which support \\n, \\r, \\t, \\b, \\f and escaped hex bytes using \\x notation, e.g. \\x00 for a null byte)."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Find", + "type": "toggleString", + "value": "", + "toggleValues": ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"] + }, + { + "name": "Replace", + "type": "binaryString", + "value": "" + }, + { + "name": "Global match", + "type": "boolean", + "value": true + }, + { + "name": "Case insensitive", + "type": "boolean", + "value": false + }, + { + "name": "Multiline matching", + "type": "boolean", + "value": true + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const [{option: type}, replace, g, i, m] = args; + let find = args[0].string, + modifiers = ""; + + if (g) modifiers += "g"; + if (i) modifiers += "i"; + if (m) modifiers += "m"; + + if (type === "Regex") { + find = new RegExp(find, modifiers); + return input.replace(find, replace); + } + + if (type.indexOf("Extended") === 0) { + find = Utils.parseEscapedChars(find); + } + + find = new RegExp(Utils.escapeRegex(find), modifiers); + + return input.replace(find, replace); + } + +} + +export default FindReplace; diff --git a/src/core/operations/RegularExpression.mjs b/src/core/operations/RegularExpression.mjs new file mode 100644 index 00000000..05f6fefb --- /dev/null +++ b/src/core/operations/RegularExpression.mjs @@ -0,0 +1,263 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ + +import XRegExp from "xregexp"; +import Operation from "../Operation"; +import Utils from "../Utils"; + +/** + * Regular expression operation + */ +class RegularExpression extends Operation { + + /** + * RegularExpression constructor + */ + constructor() { + super(); + + this.name = "Regular expression"; + this.module = "Regex"; + this.description = "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.

Supports extended regex syntax including the 'dot matches all' flag, named capture groups, full unicode coverage (including \\p{} categories and scripts as well as astral codes) and recursive matching."; + this.inputType = "string"; + this.outputType = "html"; + this.args = [ + { + "name": "Built in regexes", + "type": "populateOption", + "value": [ + { + name: "User defined", + value: "" + }, + { + name: "IPv4 address", + value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?" + }, + { + name: "IPv6 address", + value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})" + }, + { + name: "Email address", + value: "\\b(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})\\b" + }, + { + name: "URL", + value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?" + }, + { + name: "Domain", + value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b" + }, + { + name: "Windows file path", + value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)~]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?" + }, + { + name: "UNIX file path", + value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+" + }, + { + name: "MAC address", + value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}" + }, + { + name: "Date (yyyy-mm-dd)", + value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" + }, + { + name: "Date (dd/mm/yyyy)", + value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)" + }, + { + name: "Date (mm/dd/yyyy)", + value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)" + }, + { + name: "Strings", + value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}" + }, + ], + "target": 1 + }, + { + "name": "Regex", + "type": "text", + "value": "" + }, + { + "name": "Case insensitive", + "type": "boolean", + "value": true + }, + { + "name": "^ and $ match at newlines", + "type": "boolean", + "value": true + }, + { + "name": "Dot matches all", + "type": "boolean", + "value": false + }, + { + "name": "Unicode support", + "type": "boolean", + "value": false + }, + { + "name": "Astral support", + "type": "boolean", + "value": false + }, + { + "name": "Display total", + "type": "boolean", + "value": false + }, + { + "name": "Output format", + "type": "option", + "value": ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"] + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {html} + */ + run(input, args) { + const userRegex = args[1], + i = args[2], + m = args[3], + s = args[4], + u = args[5], + a = args[6], + displayTotal = args[7], + outputFormat = args[8]; + let modifiers = "g"; + + if (i) modifiers += "i"; + if (m) modifiers += "m"; + if (s) modifiers += "s"; + if (u) modifiers += "u"; + if (a) modifiers += "A"; + + if (userRegex && userRegex !== "^" && userRegex !== "$") { + try { + const regex = new XRegExp(userRegex, modifiers); + + switch (outputFormat) { + case "Highlight matches": + return regexHighlight(input, regex, displayTotal); + case "List matches": + return Utils.escapeHtml(regexList(input, regex, displayTotal, true, false)); + case "List capture groups": + return Utils.escapeHtml(regexList(input, regex, displayTotal, false, true)); + case "List matches with capture groups": + return Utils.escapeHtml(regexList(input, regex, displayTotal, true, true)); + default: + return "Error: Invalid output format"; + } + } catch (err) { + return "Invalid regex. Details: " + err.message; + } + } else { + return Utils.escapeHtml(input); + } + } + +} + +export default RegularExpression; + +/** + * Creates a string listing the matches within a string. + * + * @param {string} input + * @param {RegExp} regex + * @param {boolean} displayTotal + * @param {boolean} matches - Display full match + * @param {boolean} captureGroups - Display each of the capture groups separately + * @returns {string} + */ +function regexList (input, regex, displayTotal, matches, captureGroups) { + let output = "", + total = 0, + match; + + while ((match = regex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (match.index === regex.lastIndex) { + regex.lastIndex++; + } + + total++; + if (matches) { + output += match[0] + "\n"; + } + if (captureGroups) { + for (let i = 1; i < match.length; i++) { + if (matches) { + output += " Group " + i + ": "; + } + output += match[i] + "\n"; + } + } + } + + if (displayTotal) + output = "Total found: " + total + "\n\n" + output; + + return output.slice(0, -1); +} + +/** + * Adds HTML highlights to matches within a string. + * + * @private + * @param {string} input + * @param {RegExp} regex + * @param {boolean} displayTotal + * @returns {string} + */ +function regexHighlight (input, regex, displayTotal) { + let output = "", + m, + hl = 1, + i = 0, + total = 0; + + while ((m = regex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (m.index === regex.lastIndex) { + regex.lastIndex++; + } + + // Add up to match + output += Utils.escapeHtml(input.slice(i, m.index)); + + // Add match with highlighting + output += "" + Utils.escapeHtml(m[0]) + ""; + + // Switch highlight + hl = hl === 1 ? 2 : 1; + + i = regex.lastIndex; + total++; + } + + // Add all after final match + output += Utils.escapeHtml(input.slice(i, input.length)); + + if (displayTotal) + output = "Total found: " + total + "\n\n" + output; + + return output; +} diff --git a/test/index.mjs b/test/index.mjs index 11811e7e..7b235211 100644 --- a/test/index.mjs +++ b/test/index.mjs @@ -52,7 +52,7 @@ import "./tests/operations/PHP"; import "./tests/operations/NetBIOS"; import "./tests/operations/OTP"; import "./tests/operations/PowerSet"; -// import "./tests/operations/Regex"; +import "./tests/operations/Regex"; import "./tests/operations/Rotate"; import "./tests/operations/StrUtils"; import "./tests/operations/SeqUtils";