From b8d3b33963680ac3f9902b0324f2456aa0873bf1 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Tue, 15 May 2018 15:03:41 +0000 Subject: [PATCH] ESM: Ported CharEnc operations --- src/core/lib/ChrEnc.mjs | 58 ++++++++++++++++++++++++++++++ src/core/operations/DecodeText.mjs | 55 ++++++++++++++++++++++++++++ src/core/operations/EncodeText.mjs | 58 ++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100644 src/core/lib/ChrEnc.mjs create mode 100644 src/core/operations/DecodeText.mjs create mode 100644 src/core/operations/EncodeText.mjs diff --git a/src/core/lib/ChrEnc.mjs b/src/core/lib/ChrEnc.mjs new file mode 100644 index 00000000..02b2e9a2 --- /dev/null +++ b/src/core/lib/ChrEnc.mjs @@ -0,0 +1,58 @@ +/** + * Character encoding resources. + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +/** + * Character encoding format mappings. + */ +export const IO_FORMAT = { + "UTF-8 (65001)": 65001, + "UTF-7 (65000)": 65000, + "UTF16LE (1200)": 1200, + "UTF16BE (1201)": 1201, + "UTF16 (1201)": 1201, + "IBM EBCDIC International (500)": 500, + "IBM EBCDIC US-Canada (37)": 37, + "Windows-874 Thai (874)": 874, + "Japanese Shift-JIS (932)": 932, + "Simplified Chinese GBK (936)": 936, + "Korean (949)": 949, + "Traditional Chinese Big5 (950)": 950, + "Windows-1250 Central European (1250)": 1250, + "Windows-1251 Cyrillic (1251)": 1251, + "Windows-1252 Latin (1252)": 1252, + "Windows-1253 Greek (1253)": 1253, + "Windows-1254 Turkish (1254)": 1254, + "Windows-1255 Hebrew (1255)": 1255, + "Windows-1256 Arabic (1256)": 1256, + "Windows-1257 Baltic (1257)": 1257, + "Windows-1258 Vietnam (1258)": 1258, + "US-ASCII (20127)": 20127, + "Simplified Chinese GB2312 (20936)": 20936, + "KOI8-R Russian Cyrillic (20866)": 20866, + "KOI8-U Ukrainian Cyrillic (21866)": 21866, + "ISO-8859-1 Latin 1 Western European (28591)": 28591, + "ISO-8859-2 Latin 2 Central European (28592)": 28592, + "ISO-8859-3 Latin 3 South European (28593)": 28593, + "ISO-8859-4 Latin 4 North European (28594)": 28594, + "ISO-8859-5 Latin/Cyrillic (28595)": 28595, + "ISO-8859-6 Latin/Arabic (28596)": 28596, + "ISO-8859-7 Latin/Greek (28597)": 28597, + "ISO-8859-8 Latin/Hebrew (28598)": 28598, + "ISO-8859-9 Latin 5 Turkish (28599)": 28599, + "ISO-8859-10 Latin 6 Nordic (28600)": 28600, + "ISO-8859-11 Latin/Thai (28601)": 28601, + "ISO-8859-13 Latin 7 Baltic Rim (28603)": 28603, + "ISO-8859-14 Latin 8 Celtic (28604)": 28604, + "ISO-8859-15 Latin 9 (28605)": 28605, + "ISO-8859-16 Latin 10 (28606)": 28606, + "ISO-2022 JIS Japanese (50222)": 50222, + "EUC Japanese (51932)": 51932, + "EUC Korean (51949)": 51949, + "Simplified Chinese GB18030 (54936)": 54936, +}; + diff --git a/src/core/operations/DecodeText.mjs b/src/core/operations/DecodeText.mjs new file mode 100644 index 00000000..9e51199f --- /dev/null +++ b/src/core/operations/DecodeText.mjs @@ -0,0 +1,55 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import cptable from "../vendor/js-codepage/cptable.js"; +import {IO_FORMAT} from "../lib/ChrEnc"; + +/** + * Decode text operation + */ +class DecodeText extends Operation { + + /** + * DecodeText constructor + */ + constructor() { + super(); + + this.name = "Decode text"; + this.module = "CharEnc"; + this.description = [ + "Decodes text from the chosen character encoding.", + "

", + "Supported charsets are:", + "", + ].join("\n"); + this.inputType = "byteArray"; + this.outputType = "string"; + this.args = [ + { + "name": "Encoding", + "type": "option", + "value": Object.keys(IO_FORMAT) + } + ]; + } + + /** + * @param {byteArray} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const format = IO_FORMAT[args[0]]; + return cptable.utils.decode(format, input); + } + +} + +export default DecodeText; diff --git a/src/core/operations/EncodeText.mjs b/src/core/operations/EncodeText.mjs new file mode 100644 index 00000000..6786be56 --- /dev/null +++ b/src/core/operations/EncodeText.mjs @@ -0,0 +1,58 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import cptable from "../vendor/js-codepage/cptable.js"; +import {IO_FORMAT} from "../lib/ChrEnc"; + +/** + * Encode text operation + */ +class EncodeText extends Operation { + + /** + * EncodeText constructor + */ + constructor() { + super(); + + this.name = "Encode text"; + this.module = "CharEnc"; + this.description = [ + "Encodes text into the chosen character encoding.", + "

", + "Supported charsets are:", + "", + ].join("\n"); + this.inputType = "string"; + this.outputType = "byteArray"; + this.args = [ + { + "name": "Encoding", + "type": "option", + "value": Object.keys(IO_FORMAT) + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {byteArray} + */ + run(input, args) { + const format = IO_FORMAT[args[0]]; + let encoded = cptable.utils.encode(format, input); + encoded = Array.from(encoded); + return encoded; + } + +} + + +export default EncodeText;