From 13a54ec3186ece17401fe144ae7a56b9ffc6bf63 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 13 Aug 2020 12:36:02 +0100 Subject: [PATCH 1/4] Add unicode text format operation --- package-lock.json | 14 ++++- src/core/config/Categories.json | 1 + src/core/operations/UnicodeTextFormat.mjs | 68 +++++++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 src/core/operations/UnicodeTextFormat.mjs diff --git a/package-lock.json b/package-lock.json index 61c05f2c..5ede14d1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8750,7 +8750,8 @@ "version": "2.1.1", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==", - "dev": true + "dev": true, + "optional": true }, "p-limit": { "version": "2.3.0", @@ -13306,6 +13307,7 @@ "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", "dev": true, + "optional": true, "requires": { "arr-flatten": "^1.1.0", "array-unique": "^0.3.2", @@ -13324,6 +13326,7 @@ "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", "dev": true, + "optional": true, "requires": { "is-extendable": "^0.1.0" } @@ -13356,6 +13359,7 @@ "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", "dev": true, + "optional": true, "requires": { "extend-shallow": "^2.0.1", "is-number": "^3.0.0", @@ -13368,6 +13372,7 @@ "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", "dev": true, + "optional": true, "requires": { "is-extendable": "^0.1.0" } @@ -13422,13 +13427,15 @@ "version": "1.1.6", "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==", - "dev": true + "dev": true, + "optional": true }, "is-number": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", "dev": true, + "optional": true, "requires": { "kind-of": "^3.0.2" }, @@ -13438,6 +13445,7 @@ "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", "dev": true, + "optional": true, "requires": { "is-buffer": "^1.1.5" } @@ -13449,6 +13457,7 @@ "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", "dev": true, + "optional": true, "requires": { "arr-diff": "^4.0.0", "array-unique": "^0.3.2", @@ -13482,6 +13491,7 @@ "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-2.1.1.tgz", "integrity": "sha1-fIDBe53+vlmeJzZ+DU3VWQFB2zg=", "dev": true, + "optional": true, "requires": { "is-number": "^3.0.0", "repeat-string": "^1.6.1" diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 77e3d319..ba0b83f4 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -200,6 +200,7 @@ "ops": [ "Encode text", "Decode text", + "Unicode Text Format", "Remove Diacritics", "Unescape Unicode Characters", "Convert to NATO alphabet" diff --git a/src/core/operations/UnicodeTextFormat.mjs b/src/core/operations/UnicodeTextFormat.mjs new file mode 100644 index 00000000..44d17afe --- /dev/null +++ b/src/core/operations/UnicodeTextFormat.mjs @@ -0,0 +1,68 @@ +/** + * @author Matt C [me@mitt.dev] + * @copyright Crown Copyright 2020 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; +import Utils from "../Utils.mjs"; + +/** + * Unicode Text Format operation + */ +class UnicodeTextFormat extends Operation { + + /** + * UnicodeTextFormat constructor + */ + constructor() { + super(); + + this.name = "Unicode Text Format"; + this.module = "Default"; + this.description = "Adds Unicode combining characters to change formatting of plaintext."; + this.infoURL = "https://en.wikipedia.org/wiki/Combining_character"; + this.inputType = "byteArray"; + this.outputType = "byteArray"; + this.args = [ + { + name: "Underline", + type: "boolean", + value: "false" + }, + { + name: "Strikethrough", + type: "boolean", + value: "false" + } + ]; + } + + /** + * @param {byteArray} input + * @param {Object[]} args + * @returns {byteArray} + */ + run(input, args) { + const [underline, strikethrough] = args; + let output = input.map(char => [char]); + console.dir(output); + if (strikethrough) { + output = output.map(charFormat => { + charFormat.push(...Utils.strToUtf8ByteArray("\u0336")); + return charFormat; + }); + } + if (underline) { + output = output.map(charFormat => { + charFormat.push(...Utils.strToUtf8ByteArray("\u0332")); + return charFormat; + }); + } + console.dir(output); + return output.flat(); + } + +} + +export default UnicodeTextFormat; From 3ab95384df46b4a8a4ec7138a2f9ad72540a4a85 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 19 Aug 2020 10:55:29 +0100 Subject: [PATCH 2/4] Add unicode tests --- src/core/operations/RemoveDiacritics.mjs | 2 +- src/core/operations/UnicodeTextFormat.mjs | 5 +- tests/operations/index.mjs | 2 +- tests/operations/tests/RemoveDiacritics.mjs | 22 ------ tests/operations/tests/Unicode.mjs | 83 +++++++++++++++++++++ 5 files changed, 87 insertions(+), 27 deletions(-) delete mode 100644 tests/operations/tests/RemoveDiacritics.mjs create mode 100644 tests/operations/tests/Unicode.mjs diff --git a/src/core/operations/RemoveDiacritics.mjs b/src/core/operations/RemoveDiacritics.mjs index dd814375..859d86d7 100644 --- a/src/core/operations/RemoveDiacritics.mjs +++ b/src/core/operations/RemoveDiacritics.mjs @@ -19,7 +19,7 @@ class RemoveDiacritics extends Operation { this.name = "Remove Diacritics"; this.module = "Default"; - this.description = "Replaces accented characters with their latin character equivalent."; + this.description = "Replaces accented characters with their latin character equivalent. Accented characters are made up of Unicode combining characters, so unicode text formatting such as strikethroughs and underlines will also be removed."; this.infoURL = "https://wikipedia.org/wiki/Diacritic"; this.inputType = "string"; this.outputType = "string"; diff --git a/src/core/operations/UnicodeTextFormat.mjs b/src/core/operations/UnicodeTextFormat.mjs index 44d17afe..b1fc474b 100644 --- a/src/core/operations/UnicodeTextFormat.mjs +++ b/src/core/operations/UnicodeTextFormat.mjs @@ -46,7 +46,6 @@ class UnicodeTextFormat extends Operation { run(input, args) { const [underline, strikethrough] = args; let output = input.map(char => [char]); - console.dir(output); if (strikethrough) { output = output.map(charFormat => { charFormat.push(...Utils.strToUtf8ByteArray("\u0336")); @@ -59,8 +58,8 @@ class UnicodeTextFormat extends Operation { return charFormat; }); } - console.dir(output); - return output.flat(); + // return output.flat(); - Not supported in Node 10, polyfilled + return [].concat(...output); } } diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index 8d3cd623..07991256 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -69,7 +69,6 @@ import "./tests/ParseQRCode.mjs"; import "./tests/PowerSet.mjs"; import "./tests/Regex.mjs"; import "./tests/Register.mjs"; -import "./tests/RemoveDiacritics.mjs"; import "./tests/Rotate.mjs"; import "./tests/SeqUtils.mjs"; import "./tests/SetDifference.mjs"; @@ -101,6 +100,7 @@ import "./tests/LuhnChecksum.mjs"; import "./tests/CipherSaber2.mjs"; import "./tests/Colossus.mjs"; import "./tests/ParseObjectIDTimestamp.mjs"; +import "./tests/Unicode.mjs"; // Cannot test operations that use the File type yet diff --git a/tests/operations/tests/RemoveDiacritics.mjs b/tests/operations/tests/RemoveDiacritics.mjs deleted file mode 100644 index c58a2ba6..00000000 --- a/tests/operations/tests/RemoveDiacritics.mjs +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Remove Diacritics tests. - * - * @author Klaxon [klaxon@veyr.com] - * @copyright Crown Copyright 2017 - * @license Apache-2.0 - */ -import TestRegister from "../../lib/TestRegister.mjs"; - -TestRegister.addTests([ - { - name: "Remove Diacritics", - input: "\xe0, \xe8, \xec, \xf2, \xf9 \xc0, \xc8, \xcc, \xd2, \xd9\n\xe1, \xe9, \xed, \xf3, \xfa, \xfd \xc1, \xc9, \xcd, \xd3, \xda, \xdd\n\xe2, \xea, \xee, \xf4, \xfb \xc2, \xca, \xce, \xd4, \xdb\n\xe3, \xf1, \xf5 \xc3, \xd1, \xd5\n\xe4, \xeb, \xef, \xf6, \xfc, \xff \xc4, \xcb, \xcf, \xd6, \xdc, \u0178\n\xe5, \xc5", - expectedOutput: "a, e, i, o, u A, E, I, O, U\na, e, i, o, u, y A, E, I, O, U, Y\na, e, i, o, u A, E, I, O, U\na, n, o A, N, O\na, e, i, o, u, y A, E, I, O, U, Y\na, A", - recipeConfig: [ - { - "op": "Remove Diacritics", - "args": [] - }, - ], - }, -]); diff --git a/tests/operations/tests/Unicode.mjs b/tests/operations/tests/Unicode.mjs new file mode 100644 index 00000000..2603768f --- /dev/null +++ b/tests/operations/tests/Unicode.mjs @@ -0,0 +1,83 @@ +/** + * Unicode operation tests. + * + * @author Matt C [me@mitt.dev] + * @author Klaxon [klaxon@veyr.com] + * + * @copyright Crown Copyright 2020 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "Unicode Text Format: underline", + input: "a", + expectedOutput: "a\u0332", + recipeConfig: [ + { + "op": "Unicode Text Format", + "args": [true, false], + } + ], + }, + { + name: "Unicode Text Format: strikethrough", + input: "a", + expectedOutput: "a\u0336", + recipeConfig: [ + { + "op": "Unicode Text Format", + "args": [false, true], + } + ], + }, + { + name: "Unicode Text Format: both", + input: "a", + expectedOutput: "a\u0336\u0332", + recipeConfig: [ + { + "op": "Unicode Text Format", + "args": [true, true], + } + ], + }, + { + name: "Remove Diacritics: text formatting", + input: "a", + expectedOutput: "a", + recipeConfig: [ + { + "op": "Unicode Text Format", + "args": [true, true], + }, + { + "op": "Remove Diacritics", + "args": [] + } + ], + }, + { + name: "Remove Diacritics: all diacritical marks one char", + input: "à̴̵̶̷̸̡̢̧̨̛̖̗̘̙̜̝̞̟̠̣̤̥̦̩̪̫̬̭̮̯̰̱̲̳̹̺̻̼́̂̃̄̅̆̇̈̉̊̋̌̍̎̏̐̑̒̓̔̽̾̿̀́͂̓̈́̕̚͠͡ͅ", // sorry about this line lol + expectedOutput: "a", + recipeConfig: [ + { + "op": "Remove Diacritics", + "args": [] + } + ], + }, + { + name: "Remove Diacritics: default", + input: "\xe0, \xe8, \xec, \xf2, \xf9 \xc0, \xc8, \xcc, \xd2, \xd9\n\xe1, \xe9, \xed, \xf3, \xfa, \xfd \xc1, \xc9, \xcd, \xd3, \xda, \xdd\n\xe2, \xea, \xee, \xf4, \xfb \xc2, \xca, \xce, \xd4, \xdb\n\xe3, \xf1, \xf5 \xc3, \xd1, \xd5\n\xe4, \xeb, \xef, \xf6, \xfc, \xff \xc4, \xcb, \xcf, \xd6, \xdc, \u0178\n\xe5, \xc5", + expectedOutput: "a, e, i, o, u A, E, I, O, U\na, e, i, o, u, y A, E, I, O, U, Y\na, e, i, o, u A, E, I, O, U\na, n, o A, N, O\na, e, i, o, u, y A, E, I, O, U, Y\na, A", + recipeConfig: [ + { + "op": "Remove Diacritics", + "args": [] + }, + ], + }, +]); From 5947ed21fcb5878b90107c16b681b4bef80b3370 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Mon, 1 Feb 2021 15:51:14 +0000 Subject: [PATCH 4/4] Tidied up localisation in Wikipedia URL --- src/core/operations/UnicodeTextFormat.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/operations/UnicodeTextFormat.mjs b/src/core/operations/UnicodeTextFormat.mjs index b1fc474b..2c18848f 100644 --- a/src/core/operations/UnicodeTextFormat.mjs +++ b/src/core/operations/UnicodeTextFormat.mjs @@ -21,7 +21,7 @@ class UnicodeTextFormat extends Operation { this.name = "Unicode Text Format"; this.module = "Default"; this.description = "Adds Unicode combining characters to change formatting of plaintext."; - this.infoURL = "https://en.wikipedia.org/wiki/Combining_character"; + this.infoURL = "https://wikipedia.org/wiki/Combining_character"; this.inputType = "byteArray"; this.outputType = "byteArray"; this.args = [