'Strings' now supports various different match types in ASCII and Unicode

2024-11-16 08:58:30 +01:00 · 2018-01-12 23:09:27 +00:00 · 2018-01-12 23:09:27 +00:00 · f2c073798b
commit f2c073798b
parent ec02b7deda
2 changed files with 64 additions and 19 deletions
--- a/src/core/config/OperationConfig.js
+++ b/src/core/config/OperationConfig.js
@ -2164,20 +2164,25 @@ const OperationConfig = {
        inputType: "string",
        outputType: "string",
        args: [
+            {
+                name: "Encoding",
+                type: "option",
+                value: Extract.ENCODING_LIST
+            },
            {
                name: "Minimum length",
                type: "number",
                value: Extract.MIN_STRING_LEN
            },
+            {
+                name: "Match",
+                type: "option",
+                value: Extract.STRING_MATCH_TYPE
+            },
            {
                name: "Display total",
                type: "boolean",
                value: Extract.DISPLAY_TOTAL
-            },
-            {
-                name: "Encoding",
-                type: "option",
-                value: Extract.ENCODING_LIST
            }
        ]
    },
--- a/src/core/operations/Extract.js
+++ b/src/core/operations/Extract.js
@ -51,17 +51,25 @@ const Extract = {
     * @constant
     * @default
     */
-    MIN_STRING_LEN: 3,
+    MIN_STRING_LEN: 4,
+    /**
+     * @constant
+     * @default
+     */
+    STRING_MATCH_TYPE: [
+        "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)",
+        "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)"
+    ],
+    /**
+     * @constant
+     * @default
+     */
+    ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"],
    /**
     * @constant
     * @default
     */
    DISPLAY_TOTAL: false,
-    /**
-     * @constant
-     * @default
-     */
-    ENCODING_LIST: ["All", "Single byte", "16-bit littleendian", "16-bit bigendian"],

    /**
     * Strings operation.
@ -71,27 +79,59 @@ const Extract = {
     * @returns {string}
     */
    runStrings: function(input, args) {
-        const minLen = args[0] || Extract.MIN_STRING_LEN,
-            displayTotal = args[1],
-            encoding = args[2];
-        let strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]";
+        const encoding = args[0],
+            minLen = args[1],
+            matchType = args[2],
+            displayTotal = args[3],
+            alphanumeric = "A-Z\\d",
+            punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@",
+            printable = "\x20-\x7e",
+            uniAlphanumeric = "\\pL\\pN",
+            uniPunctuation = "\\pP\\pZ",
+            uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP";

+        let strings = "";
+
+        switch (matchType) {
+            case "Alphanumeric + punctuation (A)":
+                strings = `[${alphanumeric + punctuation}]`;
+                break;
+            case "All printable chars (A)":
+            case "Null-terminated strings (A)":
+                strings = `[${printable}]`;
+                break;
+            case "Alphanumeric + punctuation (U)":
+                strings = `[${uniAlphanumeric + uniPunctuation}]`;
+                break;
+            case "All printable chars (U)":
+            case "Null-terminated strings (U)":
+                strings = `[${uniPrintable}]`;
+                break;
+        }
+
+        // UTF-16 support is hacked in by allowing null bytes on either side of the matched chars
        switch (encoding) {
            case "All":
-                strings = "(\x00?" + strings + "\x00?)";
+                strings = `(\x00?${strings}\x00?)`;
                break;
            case "16-bit littleendian":
-                strings = "(" + strings + "\x00)";
+                strings = `(${strings}\x00)`;
                break;
            case "16-bit bigendian":
-                strings = "(\x00" + strings + ")";
+                strings = `(\x00${strings})`;
                break;
            case "Single byte":
            default:
                break;
        }

-        const regex = new XRegExp(strings + "{" + minLen + ",}", "ig");
+        strings = `${strings}{${minLen},}`;
+
+        if (matchType.includes("Null-terminated")) {
+            strings += "\x00";
+        }
+
+        const regex = new XRegExp(strings, "ig");

        return Extract._search(input, regex, null, displayTotal);
    },