'Strings' now supports various different match types in ASCII and Unicode

This commit is contained in:
n1474335 2018-01-12 23:09:27 +00:00
parent ec02b7deda
commit f2c073798b
2 changed files with 64 additions and 19 deletions

View file

@ -2164,20 +2164,25 @@ const OperationConfig = {
inputType: "string",
outputType: "string",
args: [
{
name: "Encoding",
type: "option",
value: Extract.ENCODING_LIST
},
{
name: "Minimum length",
type: "number",
value: Extract.MIN_STRING_LEN
},
{
name: "Match",
type: "option",
value: Extract.STRING_MATCH_TYPE
},
{
name: "Display total",
type: "boolean",
value: Extract.DISPLAY_TOTAL
},
{
name: "Encoding",
type: "option",
value: Extract.ENCODING_LIST
}
]
},

View file

@ -51,17 +51,25 @@ const Extract = {
* @constant
* @default
*/
MIN_STRING_LEN: 3,
MIN_STRING_LEN: 4,
/**
* @constant
* @default
*/
STRING_MATCH_TYPE: [
"[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
"[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
],
/**
* @constant
* @default
*/
ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
/**
* @constant
* @default
*/
DISPLAY_TOTAL: false,
/**
* @constant
* @default
*/
ENCODING_LIST: ["All", "Single byte", "16-bit littleendian", "16-bit bigendian"],
/**
* Strings operation.
@ -71,27 +79,59 @@ const Extract = {
* @returns {string}
*/
runStrings: function(input, args) {
const minLen = args[0] || Extract.MIN_STRING_LEN,
displayTotal = args[1],
encoding = args[2];
let strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]";
const encoding = args[0],
minLen = args[1],
matchType = args[2],
displayTotal = args[3],
alphanumeric = "A-Z\\d",
punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
printable = "\x20-\x7e",
uniAlphanumeric = "\\pL\\pN",
uniPunctuation = "\\pP\\pZ",
uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
let strings = "";
switch (matchType) {
case "Alphanumeric + punctuation (A)":
strings = `[${alphanumeric + punctuation}]`;
break;
case "All printable chars (A)":
case "Null-terminated strings (A)":
strings = `[${printable}]`;
break;
case "Alphanumeric + punctuation (U)":
strings = `[${uniAlphanumeric + uniPunctuation}]`;
break;
case "All printable chars (U)":
case "Null-terminated strings (U)":
strings = `[${uniPrintable}]`;
break;
}
// UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
switch (encoding) {
case "All":
strings = "(\x00?" + strings + "\x00?)";
strings = `(\x00?${strings}\x00?)`;
break;
case "16-bit littleendian":
strings = "(" + strings + "\x00)";
strings = `(${strings}\x00)`;
break;
case "16-bit bigendian":
strings = "(\x00" + strings + ")";
strings = `(\x00${strings})`;
break;
case "Single byte":
default:
break;
}
const regex = new XRegExp(strings + "{" + minLen + ",}", "ig");
strings = `${strings}{${minLen},}`;
if (matchType.includes("Null-terminated")) {
strings += "\x00";
}
const regex = new XRegExp(strings, "ig");
return Extract._search(input, regex, null, displayTotal);
},