Split "Text encoding" op into two ops

This commit splits "Text encoding" into two operations: + Encode text `string -> byteArray` + Decode text `byteArray -> string` Base64 and Hex support are removed "Encode text" and "Decode text" as they have their own operations. Encode and decode operations now have support for the following encodings: + IBM EBCDIC US-Canada + IBM EBCDIC International + Windows-874 Thai + Japanese Shift-JIS + Simplified Chinese GBK + Korean + Traditional Chinese Big5 + UTF-16, little endian + UTF-16, big endian + Windows-1250 Central European + Windows-1251 Cyrillic + Windows-1252 Latin + Windows-1253 Greek + Windows-1254 Turkish + Windows-1255 Hebrew + Windows-1256 Arabic + Windows-1257 Baltic + Windows-1258 Vietnam + US-ASCII + Russian Cyrillic KOI8-R + Simplified Chinese GB2312 + KOI8-U Ukrainian Cyrillic + ISO-8859-1 Latin 1 (Western European) + ISO-8859-2 Latin 2 (Central European) + ISO-8859-3 Latin 3 + ISO-8859-4 Baltic + ISO-8859-5 Cyrillic + ISO-8859-6 Arabic + ISO-8859-7 Greek + ISO-8859-8 Hebrew + ISO-8859-9 Turkish + ISO-8859-10 Latin 6 + ISO-8859-11 Latin (Thai) + ISO-8859-13 Latin 7 (Estonian) + ISO-8859-14 Latin 8 (Celtic) + ISO-8859-15 Latin 9 + ISO-8859-16 Latin 10 + ISO-2022 JIS Japanese + EUC Japanese + EUC Korean + Simplified Chinese GB18030 + UTF-7 + UTF-8
2017-05-17 11:17:11 -04:00 · 2017-05-17 11:17:11 -04:00 · 2b7ba594fc
parent 3c15bd9e29
commit 2b7ba594fc
5 changed files with 2119 additions and 121 deletions
--- a/src/core/config/Categories.js
+++ b/src/core/config/Categories.js
@ -61,7 +61,8 @@ const Categories = [
            "Hex to PEM",
            "Parse ASN.1 hex string",
            "Change IP format",
-            "Text encoding",
+            "Encode text",
            "Decode text",
            "Swap endianness",
        ]
    },
@ -143,7 +144,8 @@ const Categories = [
    {
        name: "Language",
        ops: [
-            "Text encoding",
+            "Encode text",
            "Decode text",
            "Unescape Unicode Characters",
        ]
    },
--- a/src/core/config/OperationConfig.js
+++ b/src/core/config/OperationConfig.js
@ -872,21 +872,43 @@ const OperationConfig = {
            }
        ]
    },
-    "Text encoding": {
+    "Encode text": {
-        description: "Translates the data between different character encodings.<br><br>Supported charsets are:<ul><li>UTF8</li><li>UTF16</li><li>UTF16LE (little-endian)</li><li>UTF16BE (big-endian)</li><li>Hex</li><li>Base64</li><li>Latin1 (ISO-8859-1)</li><li>Windows-1251</li></ul>",
+        description: [
-        run: CharEnc.run,
+            "Encodes text into the chosen character encoding.",
            "<br><br>",
            "Supported charsets are:",
            "<ul>",
            Object.keys(CharEnc.IO_FORMAT).map(e => `<li>${e}</li>`).join("<br>"),
            "</ul>",
        ].join("\n"),
        run: CharEnc.runEncode,
        inputType: "string",
        outputType: "byteArray",
        args: [
            {
                name: "Encoding",
                type: "option",
                value: Object.keys(CharEnc.IO_FORMAT),
            },
        ]
    },
    "Decode text": {
        description: [
            "Decodes text from the chosen character encoding.",
            "<br><br>",
            "Supported charsets are:",
            "<ul>",
            Object.keys(CharEnc.IO_FORMAT).map(e => `<li>${e}</li>`).join("<br>"),
            "</ul>",
        ].join("\n"),
        run: CharEnc.runDecode,
        inputType: "byteArray",
        outputType: "string",
        args: [
            {
-                name: "Input type",
+                name: "Encoding",
                type: "option",
-                value: CharEnc.IO_FORMAT
+                value: Object.keys(CharEnc.IO_FORMAT),
            },
            {
                name: "Output type",
                type: "option",
                value: CharEnc.IO_FORMAT
            },
        ]
    },
@ -3249,44 +3271,6 @@ const OperationConfig = {
            },
        ]
    },
    "To EBCDIC": {
        description: [
            "This operation converts ASCII text to EBCDIC.",
            "<br>",
            "You can choose between a few versions of EBCDIC.",
            "<br>",
            "EBCDIC is a proprietary encoding pre-dating ASCII that originated at IBM.",
        ].join("\n"),
        run: CharEnc.runToEBCDIC,
        inputType: "string",
        outputType: "byteArray",
        args: [
            {
                name: "EBCDIC version",
                type: "option",
                value: Object.keys(CharEnc.EBCDIC_CODEPAGES_MAPPING),
            },
        ]
    },
    "From EBCDIC": {
        description: [
            "This operation converts ASCII text from EBCDIC.",
            "<br>",
            "You can choose between a few versions of EBCDIC.",
            "<br>",
            "EBCDIC is a proprietary encoding pre-dating ASCII that originated at IBM.",
        ].join("\n"),
        run: CharEnc.runFromEBCDIC,
        inputType: "byteArray",
        outputType: "string",
        args: [
            {
                name: "EBCDIC version",
                type: "option",
                value: Object.keys(CharEnc.EBCDIC_CODEPAGES_MAPPING),
            },
        ]
    },
 };
 export default OperationConfig;
--- a/src/core/lib/codepage.js
+++ b/src/core/lib/codepage.js
--- a/src/core/operations/CharEnc.js
+++ b/src/core/operations/CharEnc.js
@ -18,80 +18,79 @@ const CharEnc = {
     * @constant
     * @default
     */
-    IO_FORMAT: ["UTF8", "UTF16", "UTF16LE", "UTF16BE", "Latin1", "Windows-1251", "Hex", "Base64"],
+    IO_FORMAT: {
-
+        "UTF-8": 65001,
-    /**
+        "UTF-7": 65000,
-     * Text encoding operation.
+        "UTF16LE": 1200,
-     *
+        "UTF16BE": 1201,
-     * @param {string} input
+        "UTF16": 1201,
     * @param {Object[]} args
     * @returns {string}
     */
    run: function(input, args) {
        var inputFormat = args[0],
            outputFormat = args[1];
        if (inputFormat === "Windows-1251") {
            input = Utils.win1251ToUnicode(input);
            input = CryptoJS.enc.Utf8.parse(input);
        } else {
            input = Utils.format[inputFormat].parse(input);
        }
        if (outputFormat === "Windows-1251") {
            input = CryptoJS.enc.Utf8.stringify(input);
            return Utils.unicodeToWin1251(input);
        } else {
            return Utils.format[outputFormat].stringify(input);
        }
    },
    /**
     *
     * @author tlwr [toby@toby.codes]
     *
     * @constant
     * @default
     */
    EBCDIC_CODEPAGES_MAPPING: {
        "IBM EBCDIC International": 500,
        "IBM EBCDIC US-Canada": 37,
        "Windows-874 Thai": 874,
        "Japanese Shift-JIS": 932,
        "Simplified Chinese GBK": 936,
        "Korean": 949,
        "Traditional Chinese Big5": 950,
        "Windows-1250 Central European": 1250,
        "Windows-1251 Cyrillic": 1251,
        "Windows-1252 Latin": 1252,
        "Windows-1253 Greek": 1253,
        "Windows-1254 Turkish": 1254,
        "Windows-1255 Hebrew": 1255,
        "Windows-1256 Arabic": 1256,
        "Windows-1257 Baltic": 1257,
        "Windows-1258 Vietnam": 1258,
        "US-ASCII": 20127,
        "Russian Cyrillic KOI8-R": 20866,
        "Simplified Chinese GB2312": 20936,
        "KOI8-U Ukrainian Cyrillic": 21866,
        "ISO-8859-1 Latin 1 (Western European)": 28591,
        "ISO-8859-2 Latin 2 (Central European)": 28592,
        "ISO-8859-3 Latin 3": 28593,
        "ISO-8859-4 Baltic": 28594,
        "ISO-8859-5 Cyrillic": 28595,
        "ISO-8859-6 Arabic": 28596,
        "ISO-8859-7 Greek": 28597,
        "ISO-8859-8 Hebrew": 28598,
        "ISO-8859-9 Turkish": 28599,
        "ISO-8859-10 Latin 6": 28600,
        "ISO-8859-11 Latin (Thai)": 28601,
        "ISO-8859-13 Latin 7 (Estonian)": 28603,
        "ISO-8859-14 Latin 8 (Celtic)": 28604,
        "ISO-8859-15 Latin 9": 28605,
        "ISO-8859-16 Latin 10": 28606,
        "ISO-2022 JIS Japanese": 50222,
        "EUC Japanese": 51932,
        "EUC Korean": 51949,
        "Simplified Chinese GB18030": 54936,
    },
    /**
-     * To EBCDIC operation.
+     * Encode text operation.
     *
     * @author tlwr [toby@toby.codes]
     *
     * @param {string} input
     * @param {Object[]} args
     * @returns {byteArray}
     */
-    runToEBCDIC: function(input, args) {
+    runEncode: function(input, args) {
-        let pageNum = CharEnc.EBCDIC_CODEPAGES_MAPPING[args[0]];
+        let format = CharEnc.IO_FORMAT[args[0]];
-
+        let encoded = cptable.utils.encode(format, input);
-        let output = cptable.utils.encode(pageNum, input);
+        encoded = Array.from(encoded);
-
+        return encoded;
        return Array.from(output);
    },
    /**
-     * From EBCDIC operation.
+     * Decode text operation.
     *
     * @author tlwr [toby@toby.codes]
     *
     * @param {byteArray} input
     * @param {Object[]} args
     * @returns {string}
     */
-    runFromEBCDIC: function(input, args) {
+    runDecode: function(input, args) {
-        let pageNum = CharEnc.EBCDIC_CODEPAGES_MAPPING[args[0]];
+        let format = CharEnc.IO_FORMAT[args[0]];
-
+        let decoded = cptable.utils.decode(format, input);
-        let output = cptable.utils.decode(pageNum, input);
+        return decoded;
        return output;
    },
 };
 export default CharEnc;
--- a/test/tests/operations/CharEnc.js
+++ b/test/tests/operations/CharEnc.js
@ -9,42 +9,42 @@ import TestRegister from "../../TestRegister.js";
 TestRegister.addTests([
    {
-        name: "From EBCDIC: nothing",
+        name: "Encode text, Decode text: nothing",
        input: "",
        expectedOutput: "",
        recipeConfig: [
            {
-                "op": "From Hex",
+                "op": "Encode text",
-                "args": ["Space"]
+                "args": ["UTF-8"]
            },
            {
-                "op": "From EBCDIC",
+                "op": "Decode text",
-                "args": ["IBM EBCDIC International"]
+                "args": ["UTF-8"]
            },
        ],
    },
    {
-        name: "From EBCDIC: hello",
+        name: "Encode text, Decode text: hello",
-        input: "88 85 93 93 96",
+        input: "hello",
        expectedOutput: "hello",
        recipeConfig: [
            {
-                "op": "From Hex",
+                "op": "Encode text",
-                "args": ["Space"]
+                "args": ["UTF-8"]
            },
            {
-                "op": "From EBCDIC",
+                "op": "Decode text",
-                "args": ["IBM EBCDIC International"]
+                "args": ["UTF-8"]
            },
        ],
    },
    {
-        name: "To EBCDIC: hello",
+        name: "Encode text (EBCDIC): hello",
        input: "hello",
        expectedOutput: "88 85 93 93 96",
        recipeConfig: [
            {
-                "op": "To EBCDIC",
+                "op": "Encode text",
                "args": ["IBM EBCDIC International"]
            },
            {
@ -53,4 +53,19 @@ TestRegister.addTests([
            },
        ],
    },
    {
        name: "Decode text (EBCDIC): 88 85 93 93 96",
        input: "88 85 93 93 96",
        expectedOutput: "hello",
        recipeConfig: [
            {
                "op": "From Hex",
                "args": ["Space"]
            },
            {
                "op": "Decode text",
                "args": ["IBM EBCDIC International"]
            },
        ],
    },
 ]);