'Strings' now supports various different match types in ASCII and Unicode

This commit is contained in:
n1474335 2018-01-12 23:09:27 +00:00
parent ec02b7deda
commit f2c073798b
2 changed files with 64 additions and 19 deletions

View file

@ -2164,20 +2164,25 @@ const OperationConfig = {
inputType: "string", inputType: "string",
outputType: "string", outputType: "string",
args: [ args: [
{
name: "Encoding",
type: "option",
value: Extract.ENCODING_LIST
},
{ {
name: "Minimum length", name: "Minimum length",
type: "number", type: "number",
value: Extract.MIN_STRING_LEN value: Extract.MIN_STRING_LEN
}, },
{
name: "Match",
type: "option",
value: Extract.STRING_MATCH_TYPE
},
{ {
name: "Display total", name: "Display total",
type: "boolean", type: "boolean",
value: Extract.DISPLAY_TOTAL value: Extract.DISPLAY_TOTAL
},
{
name: "Encoding",
type: "option",
value: Extract.ENCODING_LIST
} }
] ]
}, },

View file

@ -51,17 +51,25 @@ const Extract = {
* @constant * @constant
* @default * @default
*/ */
MIN_STRING_LEN: 3, MIN_STRING_LEN: 4,
/**
* @constant
* @default
*/
STRING_MATCH_TYPE: [
"[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
"[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
],
/**
* @constant
* @default
*/
ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
/** /**
* @constant * @constant
* @default * @default
*/ */
DISPLAY_TOTAL: false, DISPLAY_TOTAL: false,
/**
* @constant
* @default
*/
ENCODING_LIST: ["All", "Single byte", "16-bit littleendian", "16-bit bigendian"],
/** /**
* Strings operation. * Strings operation.
@ -71,27 +79,59 @@ const Extract = {
* @returns {string} * @returns {string}
*/ */
runStrings: function(input, args) { runStrings: function(input, args) {
const minLen = args[0] || Extract.MIN_STRING_LEN, const encoding = args[0],
displayTotal = args[1], minLen = args[1],
encoding = args[2]; matchType = args[2],
let strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]"; displayTotal = args[3],
alphanumeric = "A-Z\\d",
punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
printable = "\x20-\x7e",
uniAlphanumeric = "\\pL\\pN",
uniPunctuation = "\\pP\\pZ",
uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";
let strings = "";
switch (matchType) {
case "Alphanumeric + punctuation (A)":
strings = `[${alphanumeric + punctuation}]`;
break;
case "All printable chars (A)":
case "Null-terminated strings (A)":
strings = `[${printable}]`;
break;
case "Alphanumeric + punctuation (U)":
strings = `[${uniAlphanumeric + uniPunctuation}]`;
break;
case "All printable chars (U)":
case "Null-terminated strings (U)":
strings = `[${uniPrintable}]`;
break;
}
// UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
switch (encoding) { switch (encoding) {
case "All": case "All":
strings = "(\x00?" + strings + "\x00?)"; strings = `(\x00?${strings}\x00?)`;
break; break;
case "16-bit littleendian": case "16-bit littleendian":
strings = "(" + strings + "\x00)"; strings = `(${strings}\x00)`;
break; break;
case "16-bit bigendian": case "16-bit bigendian":
strings = "(\x00" + strings + ")"; strings = `(\x00${strings})`;
break; break;
case "Single byte": case "Single byte":
default: default:
break; break;
} }
const regex = new XRegExp(strings + "{" + minLen + ",}", "ig"); strings = `${strings}{${minLen},}`;
if (matchType.includes("Null-terminated")) {
strings += "\x00";
}
const regex = new XRegExp(strings, "ig");
return Extract._search(input, regex, null, displayTotal); return Extract._search(input, regex, null, displayTotal);
}, },