mirror of
https://github.com/gchq/CyberChef.git
synced 2024-11-16 08:58:30 +01:00
'Strings' now supports various different match types in ASCII and Unicode
This commit is contained in:
parent
ec02b7deda
commit
f2c073798b
2 changed files with 64 additions and 19 deletions
|
@ -2164,20 +2164,25 @@ const OperationConfig = {
|
|||
inputType: "string",
|
||||
outputType: "string",
|
||||
args: [
|
||||
{
|
||||
name: "Encoding",
|
||||
type: "option",
|
||||
value: Extract.ENCODING_LIST
|
||||
},
|
||||
{
|
||||
name: "Minimum length",
|
||||
type: "number",
|
||||
value: Extract.MIN_STRING_LEN
|
||||
},
|
||||
{
|
||||
name: "Match",
|
||||
type: "option",
|
||||
value: Extract.STRING_MATCH_TYPE
|
||||
},
|
||||
{
|
||||
name: "Display total",
|
||||
type: "boolean",
|
||||
value: Extract.DISPLAY_TOTAL
|
||||
},
|
||||
{
|
||||
name: "Encoding",
|
||||
type: "option",
|
||||
value: Extract.ENCODING_LIST
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
@ -51,17 +51,25 @@ const Extract = {
|
|||
* @constant
|
||||
* @default
|
||||
*/
|
||||
MIN_STRING_LEN: 3,
|
||||
MIN_STRING_LEN: 4,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
STRING_MATCH_TYPE: [
|
||||
"[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
|
||||
"[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
|
||||
],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
DISPLAY_TOTAL: false,
|
||||
/**
|
||||
* @constant
|
||||
* @default
|
||||
*/
|
||||
ENCODING_LIST: ["All", "Single byte", "16-bit littleendian", "16-bit bigendian"],
|
||||
|
||||
/**
|
||||
* Strings operation.
|
||||
|
@ -71,27 +79,59 @@ const Extract = {
|
|||
* @returns {string}
|
||||
*/
|
||||
runStrings: function(input, args) {
|
||||
const minLen = args[0] || Extract.MIN_STRING_LEN,
|
||||
displayTotal = args[1],
|
||||
encoding = args[2];
|
||||
let strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]";
|
||||
const encoding = args[0],
|
||||
minLen = args[1],
|
||||
matchType = args[2],
|
||||
displayTotal = args[3],
|
||||
alphanumeric = "A-Z\\d",
|
||||
punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
|
||||
printable = "\x20-\x7e",
|
||||
uniAlphanumeric = "\\pL\\pN",
|
||||
uniPunctuation = "\\pP\\pZ",
|
||||
uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
|
||||
|
||||
let strings = "";
|
||||
|
||||
switch (matchType) {
|
||||
case "Alphanumeric + punctuation (A)":
|
||||
strings = `[${alphanumeric + punctuation}]`;
|
||||
break;
|
||||
case "All printable chars (A)":
|
||||
case "Null-terminated strings (A)":
|
||||
strings = `[${printable}]`;
|
||||
break;
|
||||
case "Alphanumeric + punctuation (U)":
|
||||
strings = `[${uniAlphanumeric + uniPunctuation}]`;
|
||||
break;
|
||||
case "All printable chars (U)":
|
||||
case "Null-terminated strings (U)":
|
||||
strings = `[${uniPrintable}]`;
|
||||
break;
|
||||
}
|
||||
|
||||
// UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
|
||||
switch (encoding) {
|
||||
case "All":
|
||||
strings = "(\x00?" + strings + "\x00?)";
|
||||
strings = `(\x00?${strings}\x00?)`;
|
||||
break;
|
||||
case "16-bit littleendian":
|
||||
strings = "(" + strings + "\x00)";
|
||||
strings = `(${strings}\x00)`;
|
||||
break;
|
||||
case "16-bit bigendian":
|
||||
strings = "(\x00" + strings + ")";
|
||||
strings = `(\x00${strings})`;
|
||||
break;
|
||||
case "Single byte":
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
const regex = new XRegExp(strings + "{" + minLen + ",}", "ig");
|
||||
strings = `${strings}{${minLen},}`;
|
||||
|
||||
if (matchType.includes("Null-terminated")) {
|
||||
strings += "\x00";
|
||||
}
|
||||
|
||||
const regex = new XRegExp(strings, "ig");
|
||||
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
|
Loading…
Reference in a new issue