mirror of
https://github.com/gchq/CyberChef.git
synced 2024-11-16 17:08:31 +01:00
Merge branch 'tlwr-feature-ebcdic'
This commit is contained in:
commit
7b18a2f46f
6 changed files with 2695 additions and 32 deletions
|
@ -61,7 +61,8 @@ const Categories = [
|
|||
"Hex to PEM",
|
||||
"Parse ASN.1 hex string",
|
||||
"Change IP format",
|
||||
"Text encoding",
|
||||
"Encode text",
|
||||
"Decode text",
|
||||
"Swap endianness",
|
||||
]
|
||||
},
|
||||
|
@ -143,7 +144,8 @@ const Categories = [
|
|||
{
|
||||
name: "Language",
|
||||
ops: [
|
||||
"Text encoding",
|
||||
"Encode text",
|
||||
"Decode text",
|
||||
"Unescape Unicode Characters",
|
||||
]
|
||||
},
|
||||
|
|
|
@ -887,21 +887,43 @@ const OperationConfig = {
|
|||
}
|
||||
]
|
||||
},
|
||||
"Text encoding": {
|
||||
description: "Translates the data between different character encodings.<br><br>Supported charsets are:<ul><li>UTF8</li><li>UTF16</li><li>UTF16LE (little-endian)</li><li>UTF16BE (big-endian)</li><li>Hex</li><li>Base64</li><li>Latin1 (ISO-8859-1)</li><li>Windows-1251</li></ul>",
|
||||
run: CharEnc.run,
|
||||
"Encode text": {
|
||||
description: [
|
||||
"Encodes text into the chosen character encoding.",
|
||||
"<br><br>",
|
||||
"Supported charsets are:",
|
||||
"<ul>",
|
||||
Object.keys(CharEnc.IO_FORMAT).map(e => `<li>${e}</li>`).join("\n"),
|
||||
"</ul>",
|
||||
].join("\n"),
|
||||
run: CharEnc.runEncode,
|
||||
inputType: "string",
|
||||
outputType: "byteArray",
|
||||
args: [
|
||||
{
|
||||
name: "Encoding",
|
||||
type: "option",
|
||||
value: Object.keys(CharEnc.IO_FORMAT),
|
||||
},
|
||||
]
|
||||
},
|
||||
"Decode text": {
|
||||
description: [
|
||||
"Decodes text from the chosen character encoding.",
|
||||
"<br><br>",
|
||||
"Supported charsets are:",
|
||||
"<ul>",
|
||||
Object.keys(CharEnc.IO_FORMAT).map(e => `<li>${e}</li>`).join("\n"),
|
||||
"</ul>",
|
||||
].join("\n"),
|
||||
run: CharEnc.runDecode,
|
||||
inputType: "byteArray",
|
||||
outputType: "string",
|
||||
args: [
|
||||
{
|
||||
name: "Input type",
|
||||
name: "Encoding",
|
||||
type: "option",
|
||||
value: CharEnc.IO_FORMAT
|
||||
},
|
||||
{
|
||||
name: "Output type",
|
||||
type: "option",
|
||||
value: CharEnc.IO_FORMAT
|
||||
value: Object.keys(CharEnc.IO_FORMAT),
|
||||
},
|
||||
]
|
||||
},
|
||||
|
|
2518
src/core/lib/codepage.js
Normal file
2518
src/core/lib/codepage.js
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,3 +1,4 @@
|
|||
import cptable from "../lib/codepage.js";
|
||||
import Utils from "../Utils.js";
|
||||
import CryptoJS from "crypto-js";
|
||||
|
||||
|
@ -17,34 +18,82 @@ const CharEnc = {
|
|||
* @constant
|
||||
* @default
|
||||
*/
|
||||
IO_FORMAT: ["UTF8", "UTF16", "UTF16LE", "UTF16BE", "Latin1", "Windows-1251", "Hex", "Base64"],
|
||||
IO_FORMAT: {
|
||||
"UTF-8 (65001)": 65001,
|
||||
"UTF-7 (65000)": 65000,
|
||||
"UTF16LE (1200)": 1200,
|
||||
"UTF16BE (1201)": 1201,
|
||||
"UTF16 (1201)": 1201,
|
||||
"IBM EBCDIC International (500)": 500,
|
||||
"IBM EBCDIC US-Canada (37)": 37,
|
||||
"Windows-874 Thai (874)": 874,
|
||||
"Japanese Shift-JIS (932)": 932,
|
||||
"Simplified Chinese GBK (936)": 936,
|
||||
"Korean (949)": 949,
|
||||
"Traditional Chinese Big5 (950)": 950,
|
||||
"Windows-1250 Central European (1250)": 1250,
|
||||
"Windows-1251 Cyrillic (1251)": 1251,
|
||||
"Windows-1252 Latin (1252)": 1252,
|
||||
"Windows-1253 Greek (1253)": 1253,
|
||||
"Windows-1254 Turkish (1254)": 1254,
|
||||
"Windows-1255 Hebrew (1255)": 1255,
|
||||
"Windows-1256 Arabic (1256)": 1256,
|
||||
"Windows-1257 Baltic (1257)": 1257,
|
||||
"Windows-1258 Vietnam (1258)": 1258,
|
||||
"US-ASCII (20127)": 20127,
|
||||
"Simplified Chinese GB2312 (20936)": 20936,
|
||||
"KOI8-R Russian Cyrillic (20866)": 20866,
|
||||
"KOI8-U Ukrainian Cyrillic (21866)": 21866,
|
||||
"ISO-8859-1 Latin 1 Western European (28591)": 28591,
|
||||
"ISO-8859-2 Latin 2 Central European (28592)": 28592,
|
||||
"ISO-8859-3 Latin 3 South European (28593)": 28593,
|
||||
"ISO-8859-4 Latin 4 North European (28594)": 28594,
|
||||
"ISO-8859-5 Latin/Cyrillic (28595)": 28595,
|
||||
"ISO-8859-6 Latin/Arabic (28596)": 28596,
|
||||
"ISO-8859-7 Latin/Greek (28597)": 28597,
|
||||
"ISO-8859-8 Latin/Hebrew (28598)": 28598,
|
||||
"ISO-8859-9 Latin 5 Turkish (28599)": 28599,
|
||||
"ISO-8859-10 Latin 6 Nordic (28600)": 28600,
|
||||
"ISO-8859-11 Latin/Thai (28601)": 28601,
|
||||
"ISO-8859-13 Latin 7 Baltic Rim (28603)": 28603,
|
||||
"ISO-8859-14 Latin 8 Celtic (28604)": 28604,
|
||||
"ISO-8859-15 Latin 9 (28605)": 28605,
|
||||
"ISO-8859-16 Latin 10 (28606)": 28606,
|
||||
"ISO-2022 JIS Japanese (50222)": 50222,
|
||||
"EUC Japanese (51932)": 51932,
|
||||
"EUC Korean (51949)": 51949,
|
||||
"Simplified Chinese GB18030 (54936)": 54936,
|
||||
},
|
||||
|
||||
/**
|
||||
* Text encoding operation.
|
||||
* Encode text operation.
|
||||
* @author tlwr [toby@toby.codes]
|
||||
*
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
* @returns {byteArray}
|
||||
*/
|
||||
run: function(input, args) {
|
||||
let inputFormat = args[0],
|
||||
outputFormat = args[1];
|
||||
|
||||
if (inputFormat === "Windows-1251") {
|
||||
input = Utils.win1251ToUnicode(input);
|
||||
input = CryptoJS.enc.Utf8.parse(input);
|
||||
} else {
|
||||
input = Utils.format[inputFormat].parse(input);
|
||||
}
|
||||
|
||||
if (outputFormat === "Windows-1251") {
|
||||
input = CryptoJS.enc.Utf8.stringify(input);
|
||||
return Utils.unicodeToWin1251(input);
|
||||
} else {
|
||||
return Utils.format[outputFormat].stringify(input);
|
||||
}
|
||||
runEncode: function(input, args) {
|
||||
const format = CharEnc.IO_FORMAT[args[0]];
|
||||
let encoded = cptable.utils.encode(format, input);
|
||||
encoded = Array.from(encoded);
|
||||
return encoded;
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Decode text operation.
|
||||
* @author tlwr [toby@toby.codes]
|
||||
*
|
||||
* @param {byteArray} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
runDecode: function(input, args) {
|
||||
const format = CharEnc.IO_FORMAT[args[0]];
|
||||
let decoded = cptable.utils.decode(format, input);
|
||||
return decoded;
|
||||
},
|
||||
};
|
||||
|
||||
export default CharEnc;
|
||||
|
|
|
@ -13,6 +13,7 @@ import "babel-polyfill";
|
|||
import TestRegister from "./TestRegister.js";
|
||||
import "./tests/operations/Base58.js";
|
||||
import "./tests/operations/ByteRepr.js";
|
||||
import "./tests/operations/CharEnc.js";
|
||||
import "./tests/operations/Code.js";
|
||||
import "./tests/operations/Compress.js";
|
||||
import "./tests/operations/FlowControl.js";
|
||||
|
|
71
test/tests/operations/CharEnc.js
Normal file
71
test/tests/operations/CharEnc.js
Normal file
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* CharEnc tests.
|
||||
*
|
||||
* @author tlwr [toby@toby.codes]
|
||||
* @copyright Crown Copyright 2017
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
import TestRegister from "../../TestRegister.js";
|
||||
|
||||
TestRegister.addTests([
|
||||
{
|
||||
name: "Encode text, Decode text: nothing",
|
||||
input: "",
|
||||
expectedOutput: "",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Encode text",
|
||||
"args": ["UTF-8 (65001)"]
|
||||
},
|
||||
{
|
||||
"op": "Decode text",
|
||||
"args": ["UTF-8 (65001)"]
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Encode text, Decode text: hello",
|
||||
input: "hello",
|
||||
expectedOutput: "hello",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Encode text",
|
||||
"args": ["UTF-8 (65001)"]
|
||||
},
|
||||
{
|
||||
"op": "Decode text",
|
||||
"args": ["UTF-8 (65001)"]
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Encode text (EBCDIC): hello",
|
||||
input: "hello",
|
||||
expectedOutput: "88 85 93 93 96",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Encode text",
|
||||
"args": ["IBM EBCDIC International (500)"]
|
||||
},
|
||||
{
|
||||
"op": "To Hex",
|
||||
"args": ["Space"]
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Decode text (EBCDIC): 88 85 93 93 96",
|
||||
input: "88 85 93 93 96",
|
||||
expectedOutput: "hello",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "From Hex",
|
||||
"args": ["Space"]
|
||||
},
|
||||
{
|
||||
"op": "Decode text",
|
||||
"args": ["IBM EBCDIC International (500)"]
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
Loading…
Reference in a new issue