Split "Text encoding" op into two ops

This commit splits "Text encoding" into two operations:
+ Encode text `string -> byteArray`
+ Decode text `byteArray -> string`

Base64 and Hex support are removed "Encode text" and "Decode text" as
they have their own operations.

Encode and decode operations now have support for the following
encodings:
+ IBM EBCDIC US-Canada
+ IBM EBCDIC International
+ Windows-874 Thai
+ Japanese Shift-JIS
+ Simplified Chinese GBK
+ Korean
+ Traditional Chinese Big5
+ UTF-16, little endian
+ UTF-16, big endian
+ Windows-1250 Central European
+ Windows-1251 Cyrillic
+ Windows-1252 Latin
+ Windows-1253 Greek
+ Windows-1254 Turkish
+ Windows-1255 Hebrew
+ Windows-1256 Arabic
+ Windows-1257 Baltic
+ Windows-1258 Vietnam
+ US-ASCII
+ Russian Cyrillic KOI8-R
+ Simplified Chinese GB2312
+ KOI8-U Ukrainian Cyrillic
+ ISO-8859-1 Latin 1 (Western European)
+ ISO-8859-2 Latin 2 (Central European)
+ ISO-8859-3 Latin 3
+ ISO-8859-4 Baltic
+ ISO-8859-5 Cyrillic
+ ISO-8859-6 Arabic
+ ISO-8859-7 Greek
+ ISO-8859-8 Hebrew
+ ISO-8859-9 Turkish
+ ISO-8859-10 Latin 6
+ ISO-8859-11 Latin (Thai)
+ ISO-8859-13 Latin 7 (Estonian)
+ ISO-8859-14 Latin 8 (Celtic)
+ ISO-8859-15 Latin 9
+ ISO-8859-16 Latin 10
+ ISO-2022 JIS Japanese
+ EUC Japanese
+ EUC Korean
+ Simplified Chinese GB18030
+ UTF-7
+ UTF-8
This commit is contained in:
toby 2017-05-17 11:17:11 -04:00
parent 3c15bd9e29
commit 2b7ba594fc
5 changed files with 2119 additions and 121 deletions

View File

@ -61,7 +61,8 @@ const Categories = [
"Hex to PEM", "Hex to PEM",
"Parse ASN.1 hex string", "Parse ASN.1 hex string",
"Change IP format", "Change IP format",
"Text encoding", "Encode text",
"Decode text",
"Swap endianness", "Swap endianness",
] ]
}, },
@ -143,7 +144,8 @@ const Categories = [
{ {
name: "Language", name: "Language",
ops: [ ops: [
"Text encoding", "Encode text",
"Decode text",
"Unescape Unicode Characters", "Unescape Unicode Characters",
] ]
}, },

View File

@ -872,21 +872,43 @@ const OperationConfig = {
} }
] ]
}, },
"Text encoding": { "Encode text": {
description: "Translates the data between different character encodings.<br><br>Supported charsets are:<ul><li>UTF8</li><li>UTF16</li><li>UTF16LE (little-endian)</li><li>UTF16BE (big-endian)</li><li>Hex</li><li>Base64</li><li>Latin1 (ISO-8859-1)</li><li>Windows-1251</li></ul>", description: [
run: CharEnc.run, "Encodes text into the chosen character encoding.",
"<br><br>",
"Supported charsets are:",
"<ul>",
Object.keys(CharEnc.IO_FORMAT).map(e => `<li>${e}</li>`).join("<br>"),
"</ul>",
].join("\n"),
run: CharEnc.runEncode,
inputType: "string", inputType: "string",
outputType: "byteArray",
args: [
{
name: "Encoding",
type: "option",
value: Object.keys(CharEnc.IO_FORMAT),
},
]
},
"Decode text": {
description: [
"Decodes text from the chosen character encoding.",
"<br><br>",
"Supported charsets are:",
"<ul>",
Object.keys(CharEnc.IO_FORMAT).map(e => `<li>${e}</li>`).join("<br>"),
"</ul>",
].join("\n"),
run: CharEnc.runDecode,
inputType: "byteArray",
outputType: "string", outputType: "string",
args: [ args: [
{ {
name: "Input type", name: "Encoding",
type: "option", type: "option",
value: CharEnc.IO_FORMAT value: Object.keys(CharEnc.IO_FORMAT),
},
{
name: "Output type",
type: "option",
value: CharEnc.IO_FORMAT
}, },
] ]
}, },
@ -3249,44 +3271,6 @@ const OperationConfig = {
}, },
] ]
}, },
"To EBCDIC": {
description: [
"This operation converts ASCII text to EBCDIC.",
"<br>",
"You can choose between a few versions of EBCDIC.",
"<br>",
"EBCDIC is a proprietary encoding pre-dating ASCII that originated at IBM.",
].join("\n"),
run: CharEnc.runToEBCDIC,
inputType: "string",
outputType: "byteArray",
args: [
{
name: "EBCDIC version",
type: "option",
value: Object.keys(CharEnc.EBCDIC_CODEPAGES_MAPPING),
},
]
},
"From EBCDIC": {
description: [
"This operation converts ASCII text from EBCDIC.",
"<br>",
"You can choose between a few versions of EBCDIC.",
"<br>",
"EBCDIC is a proprietary encoding pre-dating ASCII that originated at IBM.",
].join("\n"),
run: CharEnc.runFromEBCDIC,
inputType: "byteArray",
outputType: "string",
args: [
{
name: "EBCDIC version",
type: "option",
value: Object.keys(CharEnc.EBCDIC_CODEPAGES_MAPPING),
},
]
},
}; };
export default OperationConfig; export default OperationConfig;

File diff suppressed because it is too large Load Diff

View File

@ -18,80 +18,79 @@ const CharEnc = {
* @constant * @constant
* @default * @default
*/ */
IO_FORMAT: ["UTF8", "UTF16", "UTF16LE", "UTF16BE", "Latin1", "Windows-1251", "Hex", "Base64"], IO_FORMAT: {
"UTF-8": 65001,
/** "UTF-7": 65000,
* Text encoding operation. "UTF16LE": 1200,
* "UTF16BE": 1201,
* @param {string} input "UTF16": 1201,
* @param {Object[]} args
* @returns {string}
*/
run: function(input, args) {
var inputFormat = args[0],
outputFormat = args[1];
if (inputFormat === "Windows-1251") {
input = Utils.win1251ToUnicode(input);
input = CryptoJS.enc.Utf8.parse(input);
} else {
input = Utils.format[inputFormat].parse(input);
}
if (outputFormat === "Windows-1251") {
input = CryptoJS.enc.Utf8.stringify(input);
return Utils.unicodeToWin1251(input);
} else {
return Utils.format[outputFormat].stringify(input);
}
},
/**
*
* @author tlwr [toby@toby.codes]
*
* @constant
* @default
*/
EBCDIC_CODEPAGES_MAPPING: {
"IBM EBCDIC International": 500, "IBM EBCDIC International": 500,
"IBM EBCDIC US-Canada": 37, "IBM EBCDIC US-Canada": 37,
"Windows-874 Thai": 874,
"Japanese Shift-JIS": 932,
"Simplified Chinese GBK": 936,
"Korean": 949,
"Traditional Chinese Big5": 950,
"Windows-1250 Central European": 1250,
"Windows-1251 Cyrillic": 1251,
"Windows-1252 Latin": 1252,
"Windows-1253 Greek": 1253,
"Windows-1254 Turkish": 1254,
"Windows-1255 Hebrew": 1255,
"Windows-1256 Arabic": 1256,
"Windows-1257 Baltic": 1257,
"Windows-1258 Vietnam": 1258,
"US-ASCII": 20127,
"Russian Cyrillic KOI8-R": 20866,
"Simplified Chinese GB2312": 20936,
"KOI8-U Ukrainian Cyrillic": 21866,
"ISO-8859-1 Latin 1 (Western European)": 28591,
"ISO-8859-2 Latin 2 (Central European)": 28592,
"ISO-8859-3 Latin 3": 28593,
"ISO-8859-4 Baltic": 28594,
"ISO-8859-5 Cyrillic": 28595,
"ISO-8859-6 Arabic": 28596,
"ISO-8859-7 Greek": 28597,
"ISO-8859-8 Hebrew": 28598,
"ISO-8859-9 Turkish": 28599,
"ISO-8859-10 Latin 6": 28600,
"ISO-8859-11 Latin (Thai)": 28601,
"ISO-8859-13 Latin 7 (Estonian)": 28603,
"ISO-8859-14 Latin 8 (Celtic)": 28604,
"ISO-8859-15 Latin 9": 28605,
"ISO-8859-16 Latin 10": 28606,
"ISO-2022 JIS Japanese": 50222,
"EUC Japanese": 51932,
"EUC Korean": 51949,
"Simplified Chinese GB18030": 54936,
}, },
/** /**
* To EBCDIC operation. * Encode text operation.
*
* @author tlwr [toby@toby.codes]
* *
* @param {string} input * @param {string} input
* @param {Object[]} args * @param {Object[]} args
* @returns {byteArray} * @returns {byteArray}
*/ */
runToEBCDIC: function(input, args) { runEncode: function(input, args) {
let pageNum = CharEnc.EBCDIC_CODEPAGES_MAPPING[args[0]]; let format = CharEnc.IO_FORMAT[args[0]];
let encoded = cptable.utils.encode(format, input);
let output = cptable.utils.encode(pageNum, input); encoded = Array.from(encoded);
return encoded;
return Array.from(output);
}, },
/** /**
* From EBCDIC operation. * Decode text operation.
*
* @author tlwr [toby@toby.codes]
* *
* @param {byteArray} input * @param {byteArray} input
* @param {Object[]} args * @param {Object[]} args
* @returns {string} * @returns {string}
*/ */
runFromEBCDIC: function(input, args) { runDecode: function(input, args) {
let pageNum = CharEnc.EBCDIC_CODEPAGES_MAPPING[args[0]]; let format = CharEnc.IO_FORMAT[args[0]];
let decoded = cptable.utils.decode(format, input);
let output = cptable.utils.decode(pageNum, input); return decoded;
return output;
}, },
}; };
export default CharEnc; export default CharEnc;

View File

@ -9,42 +9,42 @@ import TestRegister from "../../TestRegister.js";
TestRegister.addTests([ TestRegister.addTests([
{ {
name: "From EBCDIC: nothing", name: "Encode text, Decode text: nothing",
input: "", input: "",
expectedOutput: "", expectedOutput: "",
recipeConfig: [ recipeConfig: [
{ {
"op": "From Hex", "op": "Encode text",
"args": ["Space"] "args": ["UTF-8"]
}, },
{ {
"op": "From EBCDIC", "op": "Decode text",
"args": ["IBM EBCDIC International"] "args": ["UTF-8"]
}, },
], ],
}, },
{ {
name: "From EBCDIC: hello", name: "Encode text, Decode text: hello",
input: "88 85 93 93 96", input: "hello",
expectedOutput: "hello", expectedOutput: "hello",
recipeConfig: [ recipeConfig: [
{ {
"op": "From Hex", "op": "Encode text",
"args": ["Space"] "args": ["UTF-8"]
}, },
{ {
"op": "From EBCDIC", "op": "Decode text",
"args": ["IBM EBCDIC International"] "args": ["UTF-8"]
}, },
], ],
}, },
{ {
name: "To EBCDIC: hello", name: "Encode text (EBCDIC): hello",
input: "hello", input: "hello",
expectedOutput: "88 85 93 93 96", expectedOutput: "88 85 93 93 96",
recipeConfig: [ recipeConfig: [
{ {
"op": "To EBCDIC", "op": "Encode text",
"args": ["IBM EBCDIC International"] "args": ["IBM EBCDIC International"]
}, },
{ {
@ -53,4 +53,19 @@ TestRegister.addTests([
}, },
], ],
}, },
{
name: "Decode text (EBCDIC): 88 85 93 93 96",
input: "88 85 93 93 96",
expectedOutput: "hello",
recipeConfig: [
{
"op": "From Hex",
"args": ["Space"]
},
{
"op": "Decode text",
"args": ["IBM EBCDIC International"]
},
],
},
]); ]);