mirror of
https://github.com/gchq/CyberChef.git
synced 2024-11-16 08:58:30 +01:00
'Strings' now supports various different match types in ASCII and Unicode
This commit is contained in:
parent
ec02b7deda
commit
f2c073798b
2 changed files with 64 additions and 19 deletions
|
@ -2164,20 +2164,25 @@ const OperationConfig = {
|
||||||
inputType: "string",
|
inputType: "string",
|
||||||
outputType: "string",
|
outputType: "string",
|
||||||
args: [
|
args: [
|
||||||
|
{
|
||||||
|
name: "Encoding",
|
||||||
|
type: "option",
|
||||||
|
value: Extract.ENCODING_LIST
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Minimum length",
|
name: "Minimum length",
|
||||||
type: "number",
|
type: "number",
|
||||||
value: Extract.MIN_STRING_LEN
|
value: Extract.MIN_STRING_LEN
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Match",
|
||||||
|
type: "option",
|
||||||
|
value: Extract.STRING_MATCH_TYPE
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Display total",
|
name: "Display total",
|
||||||
type: "boolean",
|
type: "boolean",
|
||||||
value: Extract.DISPLAY_TOTAL
|
value: Extract.DISPLAY_TOTAL
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Encoding",
|
|
||||||
type: "option",
|
|
||||||
value: Extract.ENCODING_LIST
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -51,17 +51,25 @@ const Extract = {
|
||||||
* @constant
|
* @constant
|
||||||
* @default
|
* @default
|
||||||
*/
|
*/
|
||||||
MIN_STRING_LEN: 3,
|
MIN_STRING_LEN: 4,
|
||||||
|
/**
|
||||||
|
* @constant
|
||||||
|
* @default
|
||||||
|
*/
|
||||||
|
STRING_MATCH_TYPE: [
|
||||||
|
"[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
|
||||||
|
"[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
|
||||||
|
],
|
||||||
|
/**
|
||||||
|
* @constant
|
||||||
|
* @default
|
||||||
|
*/
|
||||||
|
ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
|
||||||
/**
|
/**
|
||||||
* @constant
|
* @constant
|
||||||
* @default
|
* @default
|
||||||
*/
|
*/
|
||||||
DISPLAY_TOTAL: false,
|
DISPLAY_TOTAL: false,
|
||||||
/**
|
|
||||||
* @constant
|
|
||||||
* @default
|
|
||||||
*/
|
|
||||||
ENCODING_LIST: ["All", "Single byte", "16-bit littleendian", "16-bit bigendian"],
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Strings operation.
|
* Strings operation.
|
||||||
|
@ -71,27 +79,59 @@ const Extract = {
|
||||||
* @returns {string}
|
* @returns {string}
|
||||||
*/
|
*/
|
||||||
runStrings: function(input, args) {
|
runStrings: function(input, args) {
|
||||||
const minLen = args[0] || Extract.MIN_STRING_LEN,
|
const encoding = args[0],
|
||||||
displayTotal = args[1],
|
minLen = args[1],
|
||||||
encoding = args[2];
|
matchType = args[2],
|
||||||
let strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]";
|
displayTotal = args[3],
|
||||||
|
alphanumeric = "A-Z\\d",
|
||||||
|
punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
|
||||||
|
printable = "\x20-\x7e",
|
||||||
|
uniAlphanumeric = "\\pL\\pN",
|
||||||
|
uniPunctuation = "\\pP\\pZ",
|
||||||
|
uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
|
||||||
|
|
||||||
|
let strings = "";
|
||||||
|
|
||||||
|
switch (matchType) {
|
||||||
|
case "Alphanumeric + punctuation (A)":
|
||||||
|
strings = `[${alphanumeric + punctuation}]`;
|
||||||
|
break;
|
||||||
|
case "All printable chars (A)":
|
||||||
|
case "Null-terminated strings (A)":
|
||||||
|
strings = `[${printable}]`;
|
||||||
|
break;
|
||||||
|
case "Alphanumeric + punctuation (U)":
|
||||||
|
strings = `[${uniAlphanumeric + uniPunctuation}]`;
|
||||||
|
break;
|
||||||
|
case "All printable chars (U)":
|
||||||
|
case "Null-terminated strings (U)":
|
||||||
|
strings = `[${uniPrintable}]`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
|
||||||
switch (encoding) {
|
switch (encoding) {
|
||||||
case "All":
|
case "All":
|
||||||
strings = "(\x00?" + strings + "\x00?)";
|
strings = `(\x00?${strings}\x00?)`;
|
||||||
break;
|
break;
|
||||||
case "16-bit littleendian":
|
case "16-bit littleendian":
|
||||||
strings = "(" + strings + "\x00)";
|
strings = `(${strings}\x00)`;
|
||||||
break;
|
break;
|
||||||
case "16-bit bigendian":
|
case "16-bit bigendian":
|
||||||
strings = "(\x00" + strings + ")";
|
strings = `(\x00${strings})`;
|
||||||
break;
|
break;
|
||||||
case "Single byte":
|
case "Single byte":
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const regex = new XRegExp(strings + "{" + minLen + ",}", "ig");
|
strings = `${strings}{${minLen},}`;
|
||||||
|
|
||||||
|
if (matchType.includes("Null-terminated")) {
|
||||||
|
strings += "\x00";
|
||||||
|
}
|
||||||
|
|
||||||
|
const regex = new XRegExp(strings, "ig");
|
||||||
|
|
||||||
return Extract._search(input, regex, null, displayTotal);
|
return Extract._search(input, regex, null, displayTotal);
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in a new issue