mirror of
https://github.com/gchq/CyberChef.git
synced 2024-11-02 14:11:02 +01:00
Merge branch 'feature-unicode-strings'
This commit is contained in:
commit
192d0ed8a6
5
package-lock.json
generated
5
package-lock.json
generated
@ -10256,6 +10256,11 @@
|
||||
"resolved": "https://registry.npmjs.org/xpath/-/xpath-0.0.27.tgz",
|
||||
"integrity": "sha512-fg03WRxtkCV6ohClePNAECYsmpKKTv5L8y/X3Dn1hQrec3POx2jHZ/0P2qQ6HvsrU1BmeqXcof3NGGueG6LxwQ=="
|
||||
},
|
||||
"xregexp": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/xregexp/-/xregexp-4.0.0.tgz",
|
||||
"integrity": "sha512-PHyM+sQouu7xspQQwELlGwwd05mXUFqwFYfqPO0cC7x4fxyHnnuetmQr6CjJiafIDoH4MogHb9dOoJzR/Y4rFg=="
|
||||
},
|
||||
"xtend": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz",
|
||||
|
@ -103,6 +103,7 @@
|
||||
"vkbeautify": "^0.99.3",
|
||||
"xmldom": "^0.1.27",
|
||||
"xpath": "0.0.27",
|
||||
"xregexp": "^4.0.0",
|
||||
"zlibjs": "^0.3.1"
|
||||
},
|
||||
"scripts": {
|
||||
|
@ -30,6 +30,7 @@ import NetBIOS from "../operations/NetBIOS.js";
|
||||
import PHP from "../operations/PHP.js";
|
||||
import PublicKey from "../operations/PublicKey.js";
|
||||
import Punycode from "../operations/Punycode.js";
|
||||
import Regex from "../operations/Regex.js";
|
||||
import Rotate from "../operations/Rotate.js";
|
||||
import SeqUtils from "../operations/SeqUtils.js";
|
||||
import Shellcode from "../operations/Shellcode.js";
|
||||
@ -2058,9 +2059,8 @@ const OperationConfig = {
|
||||
args: []
|
||||
},
|
||||
"Find / Replace": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Replaces all occurrences of the first string with the second.<br><br> Includes support for regular expressions (regex), simple strings and extended strings (which support \\n, \\r, \\t, \\b, \\f and escaped hex bytes using \\x notation, e.g. \\x00 for a null byte).",
|
||||
manualBake: true,
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
args: [
|
||||
@ -2068,7 +2068,7 @@ const OperationConfig = {
|
||||
name: "Find",
|
||||
type: "toggleString",
|
||||
value: "",
|
||||
toggleValues: StrUtils.SEARCH_TYPE
|
||||
toggleValues: Regex.SEARCH_TYPE
|
||||
},
|
||||
{
|
||||
name: "Replace",
|
||||
@ -2078,17 +2078,17 @@ const OperationConfig = {
|
||||
{
|
||||
name: "Global match",
|
||||
type: "boolean",
|
||||
value: StrUtils.FIND_REPLACE_GLOBAL,
|
||||
value: Regex.FIND_REPLACE_GLOBAL,
|
||||
},
|
||||
{
|
||||
name: "Case insensitive",
|
||||
type: "boolean",
|
||||
value: StrUtils.FIND_REPLACE_CASE,
|
||||
value: Regex.FIND_REPLACE_CASE,
|
||||
},
|
||||
{
|
||||
name: "Multiline matching",
|
||||
type: "boolean",
|
||||
value: StrUtils.FIND_REPLACE_MULTILINE,
|
||||
value: Regex.FIND_REPLACE_MULTILINE,
|
||||
},
|
||||
|
||||
]
|
||||
@ -2138,7 +2138,6 @@ const OperationConfig = {
|
||||
"Filter": {
|
||||
module: "Default",
|
||||
description: "Splits up the input using the specified delimiter and then filters each branch based on a regular expression.",
|
||||
manualBake: true,
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
args: [
|
||||
@ -2160,16 +2159,26 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Strings": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Extracts all strings from the input.",
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
args: [
|
||||
{
|
||||
name: "Encoding",
|
||||
type: "option",
|
||||
value: Extract.ENCODING_LIST
|
||||
},
|
||||
{
|
||||
name: "Minimum length",
|
||||
type: "number",
|
||||
value: Extract.MIN_STRING_LEN
|
||||
},
|
||||
{
|
||||
name: "Match",
|
||||
type: "option",
|
||||
value: Extract.STRING_MATCH_TYPE
|
||||
},
|
||||
{
|
||||
name: "Display total",
|
||||
type: "boolean",
|
||||
@ -2178,7 +2187,7 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Extract IP addresses": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Extracts all IPv4 and IPv6 addresses.<br><br>Warning: Given a string <code>710.65.0.456</code>, this will match <code>10.65.0.45</code> so always check the original input!",
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
@ -2206,7 +2215,7 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Extract email addresses": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Extracts all email addresses from the input.",
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
@ -2219,7 +2228,7 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Extract MAC addresses": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Extracts all Media Access Control (MAC) addresses from the input.",
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
@ -2232,7 +2241,7 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Extract URLs": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives.",
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
@ -2245,7 +2254,7 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Extract domains": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Extracts domain names.<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
@ -2258,7 +2267,7 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Extract file paths": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Extracts anything that looks like a Windows or UNIX file path.<br><br>Note that if UNIX is selected, there will likely be a lot of false positives.",
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
@ -2281,7 +2290,7 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Extract dates": {
|
||||
module: "Default",
|
||||
module: "Regex",
|
||||
description: "Extracts dates in the following formats<ul><li><code>yyyy-mm-dd</code></li><li><code>dd/mm/yyyy</code></li><li><code>mm/dd/yyyy</code></li></ul>Dividers can be any of /, -, . or space",
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
@ -2294,16 +2303,15 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Regular expression": {
|
||||
module: "Default",
|
||||
description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.",
|
||||
manualBake: true,
|
||||
module: "Regex",
|
||||
description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.<br><br>Supports extended regex syntax including the 'dot matches all' flag, named capture groups, full unicode coverage (including <code>\\p{}</code> categories and scripts as well as astral codes) and recursive matching.",
|
||||
inputType: "string",
|
||||
outputType: "html",
|
||||
args: [
|
||||
{
|
||||
name: "Built in regexes",
|
||||
type: "populateOption",
|
||||
value: StrUtils.REGEX_PRE_POPULATE,
|
||||
value: Regex.REGEX_PRE_POPULATE,
|
||||
target: 1,
|
||||
},
|
||||
{
|
||||
@ -2314,22 +2322,37 @@ const OperationConfig = {
|
||||
{
|
||||
name: "Case insensitive",
|
||||
type: "boolean",
|
||||
value: StrUtils.REGEX_CASE_INSENSITIVE
|
||||
value: true
|
||||
},
|
||||
{
|
||||
name: "Multiline matching",
|
||||
name: "^ and $ match at newlines",
|
||||
type: "boolean",
|
||||
value: StrUtils.REGEX_MULTILINE_MATCHING
|
||||
value: true
|
||||
},
|
||||
{
|
||||
name: "Dot matches all",
|
||||
type: "boolean",
|
||||
value: false
|
||||
},
|
||||
{
|
||||
name: "Unicode support",
|
||||
type: "boolean",
|
||||
value: false
|
||||
},
|
||||
{
|
||||
name: "Astral support",
|
||||
type: "boolean",
|
||||
value: false
|
||||
},
|
||||
{
|
||||
name: "Display total",
|
||||
type: "boolean",
|
||||
value: StrUtils.DISPLAY_TOTAL
|
||||
value: Regex.DISPLAY_TOTAL
|
||||
},
|
||||
{
|
||||
name: "Output format",
|
||||
type: "option",
|
||||
value: StrUtils.OUTPUT_FORMAT
|
||||
value: Regex.OUTPUT_FORMAT
|
||||
},
|
||||
]
|
||||
},
|
||||
|
@ -10,7 +10,6 @@ import Convert from "../../operations/Convert.js";
|
||||
import DateTime from "../../operations/DateTime.js";
|
||||
import Endian from "../../operations/Endian.js";
|
||||
import Entropy from "../../operations/Entropy.js";
|
||||
import Extract from "../../operations/Extract.js";
|
||||
import FileType from "../../operations/FileType.js";
|
||||
import Hexdump from "../../operations/Hexdump.js";
|
||||
import HTML from "../../operations/HTML.js";
|
||||
@ -99,11 +98,9 @@ OpModules.Default = {
|
||||
"Format MAC addresses": MAC.runFormat,
|
||||
"Encode NetBIOS Name": NetBIOS.runEncodeName,
|
||||
"Decode NetBIOS Name": NetBIOS.runDecodeName,
|
||||
"Regular expression": StrUtils.runRegex,
|
||||
"Offset checker": StrUtils.runOffsetChecker,
|
||||
"To Upper case": StrUtils.runUpper,
|
||||
"To Lower case": StrUtils.runLower,
|
||||
"Find / Replace": StrUtils.runFindReplace,
|
||||
"Split": StrUtils.runSplit,
|
||||
"Filter": StrUtils.runFilter,
|
||||
"Escape string": StrUtils.runEscape,
|
||||
@ -133,14 +130,6 @@ OpModules.Default = {
|
||||
"Translate DateTime Format": DateTime.runTranslateFormat,
|
||||
"From UNIX Timestamp": DateTime.runFromUnixTimestamp,
|
||||
"To UNIX Timestamp": DateTime.runToUnixTimestamp,
|
||||
"Strings": Extract.runStrings,
|
||||
"Extract IP addresses": Extract.runIp,
|
||||
"Extract email addresses": Extract.runEmail,
|
||||
"Extract MAC addresses": Extract.runMac,
|
||||
"Extract URLs": Extract.runUrls,
|
||||
"Extract domains": Extract.runDomains,
|
||||
"Extract file paths": Extract.runFilePaths,
|
||||
"Extract dates": Extract.runDates,
|
||||
"Microsoft Script Decoder": MS.runDecodeScript,
|
||||
"Entropy": Entropy.runEntropy,
|
||||
"Frequency distribution": Entropy.runFreqDistrib,
|
||||
|
@ -18,6 +18,7 @@ import HTTPModule from "./HTTP.js";
|
||||
import ImageModule from "./Image.js";
|
||||
import JSBNModule from "./JSBN.js";
|
||||
import PublicKeyModule from "./PublicKey.js";
|
||||
import RegexModule from "./Regex.js";
|
||||
import ShellcodeModule from "./Shellcode.js";
|
||||
import URLModule from "./URL.js";
|
||||
|
||||
@ -34,6 +35,7 @@ Object.assign(
|
||||
ImageModule,
|
||||
JSBNModule,
|
||||
PublicKeyModule,
|
||||
RegexModule,
|
||||
ShellcodeModule,
|
||||
URLModule
|
||||
);
|
||||
|
30
src/core/config/modules/Regex.js
Normal file
30
src/core/config/modules/Regex.js
Normal file
@ -0,0 +1,30 @@
|
||||
import Extract from "../../operations/Extract.js";
|
||||
import Regex from "../../operations/Regex.js";
|
||||
|
||||
|
||||
/**
|
||||
* Regex module.
|
||||
*
|
||||
* Libraries:
|
||||
* - XRegExp
|
||||
*
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2018
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
let OpModules = typeof self === "undefined" ? {} : self.OpModules || {};
|
||||
|
||||
OpModules.Regex = {
|
||||
"Regular expression": Regex.runRegex,
|
||||
"Find / Replace": Regex.runFindReplace,
|
||||
"Strings": Extract.runStrings,
|
||||
"Extract IP addresses": Extract.runIp,
|
||||
"Extract email addresses": Extract.runEmail,
|
||||
"Extract MAC addresses": Extract.runMac,
|
||||
"Extract URLs": Extract.runUrls,
|
||||
"Extract domains": Extract.runDomains,
|
||||
"Extract file paths": Extract.runFilePaths,
|
||||
"Extract dates": Extract.runDates,
|
||||
};
|
||||
|
||||
export default OpModules;
|
@ -1,3 +1,6 @@
|
||||
import XRegExp from "xregexp";
|
||||
|
||||
|
||||
/**
|
||||
* Identifier extraction operations.
|
||||
*
|
||||
@ -26,6 +29,11 @@ const Extract = {
|
||||
match;
|
||||
|
||||
while ((match = searchRegex.exec(input))) {
|
||||
// Moves pointer when an empty string is matched (prevents infinite loop)
|
||||
if (match.index === searchRegex.lastIndex) {
|
||||
searchRegex.lastIndex++;
|
||||
}
|
||||
|
||||
if (removeRegex && removeRegex.test(match[0]))
|
||||
continue;
|
||||
total++;
|
||||
@ -43,7 +51,20 @@ const Extract = {
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
MIN_STRING_LEN: 3,
|
||||
MIN_STRING_LEN: 4,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
STRING_MATCH_TYPE: [
|
||||
"[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
|
||||
"[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
|
||||
],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
@ -58,10 +79,59 @@ const Extract = {
|
||||
* @returns {string}
|
||||
*/
|
||||
runStrings: function(input, args) {
|
||||
let minLen = args[0] || Extract.MIN_STRING_LEN,
|
||||
displayTotal = args[1],
|
||||
strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]",
|
||||
regex = new RegExp(strings + "{" + minLen + ",}", "ig");
|
||||
const encoding = args[0],
|
||||
minLen = args[1],
|
||||
matchType = args[2],
|
||||
displayTotal = args[3],
|
||||
alphanumeric = "A-Z\\d",
|
||||
punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
|
||||
printable = "\x20-\x7e",
|
||||
uniAlphanumeric = "\\pL\\pN",
|
||||
uniPunctuation = "\\pP\\pZ",
|
||||
uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
|
||||
|
||||
let strings = "";
|
||||
|
||||
switch (matchType) {
|
||||
case "Alphanumeric + punctuation (A)":
|
||||
strings = `[${alphanumeric + punctuation}]`;
|
||||
break;
|
||||
case "All printable chars (A)":
|
||||
case "Null-terminated strings (A)":
|
||||
strings = `[${printable}]`;
|
||||
break;
|
||||
case "Alphanumeric + punctuation (U)":
|
||||
strings = `[${uniAlphanumeric + uniPunctuation}]`;
|
||||
break;
|
||||
case "All printable chars (U)":
|
||||
case "Null-terminated strings (U)":
|
||||
strings = `[${uniPrintable}]`;
|
||||
break;
|
||||
}
|
||||
|
||||
// UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
|
||||
switch (encoding) {
|
||||
case "All":
|
||||
strings = `(\x00?${strings}\x00?)`;
|
||||
break;
|
||||
case "16-bit littleendian":
|
||||
strings = `(${strings}\x00)`;
|
||||
break;
|
||||
case "16-bit bigendian":
|
||||
strings = `(\x00${strings})`;
|
||||
break;
|
||||
case "Single byte":
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
strings = `${strings}{${minLen},}`;
|
||||
|
||||
if (matchType.includes("Null-terminated")) {
|
||||
strings += "\x00";
|
||||
}
|
||||
|
||||
const regex = new XRegExp(strings, "ig");
|
||||
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
278
src/core/operations/Regex.js
Normal file
278
src/core/operations/Regex.js
Normal file
@ -0,0 +1,278 @@
|
||||
import XRegExp from "xregexp";
|
||||
import Utils from "../Utils.js";
|
||||
|
||||
|
||||
/**
|
||||
* Regex operations.
|
||||
*
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2018
|
||||
* @license Apache-2.0
|
||||
*
|
||||
* @namespace
|
||||
*/
|
||||
const Regex = {
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
REGEX_PRE_POPULATE: [
|
||||
{
|
||||
name: "User defined",
|
||||
value: ""
|
||||
},
|
||||
{
|
||||
name: "IPv4 address",
|
||||
value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?"
|
||||
},
|
||||
{
|
||||
name: "IPv6 address",
|
||||
value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})"
|
||||
},
|
||||
{
|
||||
name: "Email address",
|
||||
value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})"
|
||||
},
|
||||
{
|
||||
name: "URL",
|
||||
value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
|
||||
},
|
||||
{
|
||||
name: "Domain",
|
||||
value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
|
||||
},
|
||||
{
|
||||
name: "Windows file path",
|
||||
value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?"
|
||||
},
|
||||
{
|
||||
name: "UNIX file path",
|
||||
value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+"
|
||||
},
|
||||
{
|
||||
name: "MAC address",
|
||||
value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}"
|
||||
},
|
||||
{
|
||||
name: "Date (yyyy-mm-dd)",
|
||||
value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])"
|
||||
},
|
||||
{
|
||||
name: "Date (dd/mm/yyyy)",
|
||||
value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)"
|
||||
},
|
||||
{
|
||||
name: "Date (mm/dd/yyyy)",
|
||||
value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)"
|
||||
},
|
||||
{
|
||||
name: "Strings",
|
||||
value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}"
|
||||
},
|
||||
],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
DISPLAY_TOTAL: false,
|
||||
|
||||
/**
|
||||
* Regular expression operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {html}
|
||||
*/
|
||||
runRegex: function(input, args) {
|
||||
const userRegex = args[1],
|
||||
i = args[2],
|
||||
m = args[3],
|
||||
s = args[4],
|
||||
u = args[5],
|
||||
a = args[6],
|
||||
displayTotal = args[7],
|
||||
outputFormat = args[8];
|
||||
let modifiers = "g";
|
||||
|
||||
if (i) modifiers += "i";
|
||||
if (m) modifiers += "m";
|
||||
if (s) modifiers += "s";
|
||||
if (u) modifiers += "u";
|
||||
if (a) modifiers += "A";
|
||||
|
||||
if (userRegex && userRegex !== "^" && userRegex !== "$") {
|
||||
try {
|
||||
const regex = new XRegExp(userRegex, modifiers);
|
||||
|
||||
switch (outputFormat) {
|
||||
case "Highlight matches":
|
||||
return Regex._regexHighlight(input, regex, displayTotal);
|
||||
case "List matches":
|
||||
return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, false));
|
||||
case "List capture groups":
|
||||
return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, false, true));
|
||||
case "List matches with capture groups":
|
||||
return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, true));
|
||||
default:
|
||||
return "Error: Invalid output format";
|
||||
}
|
||||
} catch (err) {
|
||||
return "Invalid regex. Details: " + err.message;
|
||||
}
|
||||
} else {
|
||||
return Utils.escapeHtml(input);
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
FIND_REPLACE_GLOBAL: true,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
FIND_REPLACE_CASE: false,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
FIND_REPLACE_MULTILINE: true,
|
||||
|
||||
/**
|
||||
* Find / Replace operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runFindReplace: function(input, args) {
|
||||
let find = args[0].string,
|
||||
type = args[0].option,
|
||||
replace = args[1],
|
||||
g = args[2],
|
||||
i = args[3],
|
||||
m = args[4],
|
||||
modifiers = "";
|
||||
|
||||
if (g) modifiers += "g";
|
||||
if (i) modifiers += "i";
|
||||
if (m) modifiers += "m";
|
||||
|
||||
if (type === "Regex") {
|
||||
find = new RegExp(find, modifiers);
|
||||
return input.replace(find, replace);
|
||||
}
|
||||
|
||||
if (type.indexOf("Extended") === 0) {
|
||||
find = Utils.parseEscapedChars(find);
|
||||
}
|
||||
|
||||
find = new RegExp(Utils.escapeRegex(find), modifiers);
|
||||
|
||||
return input.replace(find, replace);
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Adds HTML highlights to matches within a string.
|
||||
*
|
||||
* @private
|
||||
* @param {string} input
|
||||
* @param {RegExp} regex
|
||||
* @param {boolean} displayTotal
|
||||
* @returns {string}
|
||||
*/
|
||||
_regexHighlight: function(input, regex, displayTotal) {
|
||||
let output = "",
|
||||
m,
|
||||
hl = 1,
|
||||
i = 0,
|
||||
total = 0;
|
||||
|
||||
while ((m = regex.exec(input))) {
|
||||
// Moves pointer when an empty string is matched (prevents infinite loop)
|
||||
if (m.index === regex.lastIndex) {
|
||||
regex.lastIndex++;
|
||||
}
|
||||
|
||||
// Add up to match
|
||||
output += Utils.escapeHtml(input.slice(i, m.index));
|
||||
|
||||
// Add match with highlighting
|
||||
output += "<span class='hl"+hl+"'>" + Utils.escapeHtml(m[0]) + "</span>";
|
||||
|
||||
// Switch highlight
|
||||
hl = hl === 1 ? 2 : 1;
|
||||
|
||||
i = regex.lastIndex;
|
||||
total++;
|
||||
}
|
||||
|
||||
// Add all after final match
|
||||
output += Utils.escapeHtml(input.slice(i, input.length));
|
||||
|
||||
if (displayTotal)
|
||||
output = "Total found: " + total + "\n\n" + output;
|
||||
|
||||
return output;
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Creates a string listing the matches within a string.
|
||||
*
|
||||
* @private
|
||||
* @param {string} input
|
||||
* @param {RegExp} regex
|
||||
* @param {boolean} displayTotal
|
||||
* @param {boolean} matches - Display full match
|
||||
* @param {boolean} captureGroups - Display each of the capture groups separately
|
||||
* @returns {string}
|
||||
*/
|
||||
_regexList: function(input, regex, displayTotal, matches, captureGroups) {
|
||||
let output = "",
|
||||
total = 0,
|
||||
match;
|
||||
|
||||
while ((match = regex.exec(input))) {
|
||||
// Moves pointer when an empty string is matched (prevents infinite loop)
|
||||
if (match.index === regex.lastIndex) {
|
||||
regex.lastIndex++;
|
||||
}
|
||||
|
||||
total++;
|
||||
if (matches) {
|
||||
output += match[0] + "\n";
|
||||
}
|
||||
if (captureGroups) {
|
||||
for (let i = 1; i < match.length; i++) {
|
||||
if (matches) {
|
||||
output += " Group " + i + ": ";
|
||||
}
|
||||
output += match[i] + "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (displayTotal)
|
||||
output = "Total found: " + total + "\n\n" + output;
|
||||
|
||||
return output.slice(0, -1);
|
||||
},
|
||||
};
|
||||
|
||||
export default Regex;
|
@ -12,128 +12,6 @@ import Utils from "../Utils.js";
|
||||
*/
|
||||
const StrUtils = {
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
REGEX_PRE_POPULATE: [
|
||||
{
|
||||
name: "User defined",
|
||||
value: ""
|
||||
},
|
||||
{
|
||||
name: "IPv4 address",
|
||||
value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?"
|
||||
},
|
||||
{
|
||||
name: "IPv6 address",
|
||||
value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})"
|
||||
},
|
||||
{
|
||||
name: "Email address",
|
||||
value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})"
|
||||
},
|
||||
{
|
||||
name: "URL",
|
||||
value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
|
||||
},
|
||||
{
|
||||
name: "Domain",
|
||||
value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
|
||||
},
|
||||
{
|
||||
name: "Windows file path",
|
||||
value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?"
|
||||
},
|
||||
{
|
||||
name: "UNIX file path",
|
||||
value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+"
|
||||
},
|
||||
{
|
||||
name: "MAC address",
|
||||
value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}"
|
||||
},
|
||||
{
|
||||
name: "Date (yyyy-mm-dd)",
|
||||
value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])"
|
||||
},
|
||||
{
|
||||
name: "Date (dd/mm/yyyy)",
|
||||
value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)"
|
||||
},
|
||||
{
|
||||
name: "Date (mm/dd/yyyy)",
|
||||
value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)"
|
||||
},
|
||||
{
|
||||
name: "Strings",
|
||||
value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}"
|
||||
},
|
||||
],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
REGEX_CASE_INSENSITIVE: true,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
REGEX_MULTILINE_MATCHING: true,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
DISPLAY_TOTAL: false,
|
||||
|
||||
/**
|
||||
* Regular expression operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {html}
|
||||
*/
|
||||
runRegex: function(input, args) {
|
||||
let userRegex = args[1],
|
||||
i = args[2],
|
||||
m = args[3],
|
||||
displayTotal = args[4],
|
||||
outputFormat = args[5],
|
||||
modifiers = "g";
|
||||
|
||||
if (i) modifiers += "i";
|
||||
if (m) modifiers += "m";
|
||||
|
||||
if (userRegex && userRegex !== "^" && userRegex !== "$") {
|
||||
try {
|
||||
const regex = new RegExp(userRegex, modifiers);
|
||||
|
||||
switch (outputFormat) {
|
||||
case "Highlight matches":
|
||||
return StrUtils._regexHighlight(input, regex, displayTotal);
|
||||
case "List matches":
|
||||
return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, false));
|
||||
case "List capture groups":
|
||||
return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, false, true));
|
||||
case "List matches with capture groups":
|
||||
return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, true));
|
||||
default:
|
||||
return "Error: Invalid output format";
|
||||
}
|
||||
} catch (err) {
|
||||
return "Invalid regex. Details: " + err.message;
|
||||
}
|
||||
} else {
|
||||
return Utils.escapeHtml(input);
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
@ -183,62 +61,6 @@ const StrUtils = {
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
FIND_REPLACE_GLOBAL: true,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
FIND_REPLACE_CASE: false,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
FIND_REPLACE_MULTILINE: true,
|
||||
|
||||
/**
|
||||
* Find / Replace operation.
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runFindReplace: function(input, args) {
|
||||
let find = args[0].string,
|
||||
type = args[0].option,
|
||||
replace = args[1],
|
||||
g = args[2],
|
||||
i = args[3],
|
||||
m = args[4],
|
||||
modifiers = "";
|
||||
|
||||
if (g) modifiers += "g";
|
||||
if (i) modifiers += "i";
|
||||
if (m) modifiers += "m";
|
||||
|
||||
if (type === "Regex") {
|
||||
find = new RegExp(find, modifiers);
|
||||
return input.replace(find, replace);
|
||||
}
|
||||
|
||||
if (type.indexOf("Extended") === 0) {
|
||||
find = Utils.parseEscapedChars(find);
|
||||
}
|
||||
|
||||
find = new RegExp(Utils.escapeRegex(find), modifiers);
|
||||
|
||||
return input.replace(find, replace);
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
@ -576,84 +398,6 @@ const StrUtils = {
|
||||
|
||||
return dist.toString();
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Adds HTML highlights to matches within a string.
|
||||
*
|
||||
* @private
|
||||
* @param {string} input
|
||||
* @param {RegExp} regex
|
||||
* @param {boolean} displayTotal
|
||||
* @returns {string}
|
||||
*/
|
||||
_regexHighlight: function(input, regex, displayTotal) {
|
||||
let output = "",
|
||||
m,
|
||||
hl = 1,
|
||||
i = 0,
|
||||
total = 0;
|
||||
|
||||
while ((m = regex.exec(input))) {
|
||||
// Add up to match
|
||||
output += Utils.escapeHtml(input.slice(i, m.index));
|
||||
|
||||
// Add match with highlighting
|
||||
output += "<span class='hl"+hl+"'>" + Utils.escapeHtml(m[0]) + "</span>";
|
||||
|
||||
// Switch highlight
|
||||
hl = hl === 1 ? 2 : 1;
|
||||
|
||||
i = regex.lastIndex;
|
||||
total++;
|
||||
}
|
||||
|
||||
// Add all after final match
|
||||
output += Utils.escapeHtml(input.slice(i, input.length));
|
||||
|
||||
if (displayTotal)
|
||||
output = "Total found: " + total + "\n\n" + output;
|
||||
|
||||
return output;
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Creates a string listing the matches within a string.
|
||||
*
|
||||
* @private
|
||||
* @param {string} input
|
||||
* @param {RegExp} regex
|
||||
* @param {boolean} displayTotal
|
||||
* @param {boolean} matches - Display full match
|
||||
* @param {boolean} captureGroups - Display each of the capture groups separately
|
||||
* @returns {string}
|
||||
*/
|
||||
_regexList: function(input, regex, displayTotal, matches, captureGroups) {
|
||||
let output = "",
|
||||
total = 0,
|
||||
match;
|
||||
|
||||
while ((match = regex.exec(input))) {
|
||||
total++;
|
||||
if (matches) {
|
||||
output += match[0] + "\n";
|
||||
}
|
||||
if (captureGroups) {
|
||||
for (let i = 1; i < match.length; i++) {
|
||||
if (matches) {
|
||||
output += " Group " + i + ": ";
|
||||
}
|
||||
output += match[i] + "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (displayTotal)
|
||||
output = "Total found: " + total + "\n\n" + output;
|
||||
|
||||
return output;
|
||||
},
|
||||
};
|
||||
|
||||
export default StrUtils;
|
||||
|
@ -30,6 +30,7 @@ import "./tests/operations/MS.js";
|
||||
import "./tests/operations/PHP.js";
|
||||
import "./tests/operations/NetBIOS.js";
|
||||
import "./tests/operations/OTP.js";
|
||||
import "./tests/operations/Regex.js";
|
||||
import "./tests/operations/StrUtils.js";
|
||||
import "./tests/operations/SeqUtils.js";
|
||||
|
||||
|
59
test/tests/operations/Regex.js
Normal file
59
test/tests/operations/Regex.js
Normal file
@ -0,0 +1,59 @@
|
||||
/**
|
||||
* StrUtils tests.
|
||||
*
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2017
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
import TestRegister from "../../TestRegister.js";
|
||||
|
||||
TestRegister.addTests([
|
||||
{
|
||||
name: "Regex: non-HTML op",
|
||||
input: "/<>",
|
||||
expectedOutput: "/<>",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Regular expression",
|
||||
"args": ["User defined", "", true, true, false, false, false, false, "Highlight matches"]
|
||||
},
|
||||
{
|
||||
"op": "Remove whitespace",
|
||||
"args": [true, true, true, true, true, false]
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Regex: Dot matches all",
|
||||
input: "Hello\nWorld",
|
||||
expectedOutput: "Hello\nWorld",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Regular expression",
|
||||
"args": ["User defined", ".+", true, true, true, false, false, false, "List matches"]
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Regex: Astral off",
|
||||
input: "𝌆😆",
|
||||
expectedOutput: "",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Regular expression",
|
||||
"args": ["User defined", "\\pS", true, true, false, false, false, false, "List matches"]
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Regex: Astral on",
|
||||
input: "𝌆😆",
|
||||
expectedOutput: "𝌆\n😆",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Regular expression",
|
||||
"args": ["User defined", "\\pS", true, true, false, false, true, false, "List matches"]
|
||||
}
|
||||
],
|
||||
}
|
||||
]);
|
@ -8,21 +8,6 @@
|
||||
import TestRegister from "../../TestRegister.js";
|
||||
|
||||
TestRegister.addTests([
|
||||
{
|
||||
name: "Regex, non-HTML op",
|
||||
input: "/<>",
|
||||
expectedOutput: "/<>",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Regular expression",
|
||||
"args": ["User defined", "", true, true, false, "Highlight matches"]
|
||||
},
|
||||
{
|
||||
"op": "Remove whitespace",
|
||||
"args": [true, true, true, true, true, false]
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Diff, basic usage",
|
||||
input: "testing23\n\ntesting123",
|
||||
|
Loading…
Reference in New Issue
Block a user