diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 2d194c37..102d76aa 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -395,6 +395,7 @@ "ops": [ "Entropy", "Frequency distribution", + "Index of Coincidence", "Chi Square", "Disassemble x86", "Pseudo-Random Number Generator", diff --git a/src/core/operations/IndexOfCoincidence.mjs b/src/core/operations/IndexOfCoincidence.mjs new file mode 100644 index 00000000..8edf5e33 --- /dev/null +++ b/src/core/operations/IndexOfCoincidence.mjs @@ -0,0 +1,107 @@ +/** + * @author George O [georgeomnet+cyberchef@gmail.com] + * @copyright Crown Copyright 2019 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import Utils from "../Utils"; + +/** + * Index of Coincidence operation + */ +class IndexOfCoincidence extends Operation { + + /** + * IndexOfCoincidence constructor + */ + constructor() { + super(); + + this.name = "Index of Coincidence"; + this.module = "Default"; + this.description = "Index of Coincidence (IC) is the probability of two randomly selected characters being the same. This can be used to determine whether text is readable or random, with English text having an IC of around 0.066. IC can therefore be a sound method to automate frequency analysis."; + this.infoURL = "https://wikipedia.org/wiki/Index_of_coincidence"; + this.inputType = "string"; + this.outputType = "number"; + this.presentType = "html"; + this.args = []; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {number} + */ + run(input, args) { + const text = input.toLowerCase().replace(/[^a-z]/g, ""), + frequencies = new Array(26).fill(0), + alphabet = Utils.expandAlphRange("a-z"); + let coincidence = 0.00, + density = 0.00, + result = 0.00, + i; + + for (i=0; i < alphabet.length; i++) { + frequencies[i] = text.count(alphabet[i]); + } + + for (i=0; i < frequencies.length; i++) { + coincidence += frequencies[i] * (frequencies[i] - 1); + } + + density = frequencies.sum(); + + // Ensure that we don't divide by 0 + if (density < 2) density = 2; + + result = coincidence / (density * (density - 1)); + + return result; + } + + /** + * Displays the IC as a scale bar for web apps. + * + * @param {number} ic + * @returns {html} + */ + present(ic) { + return `Index of Coincidence: ${ic} +Normalized: ${ic * 26} +

+- 0 represents complete randomness (all characters are unique), whereas 1 represents no randomness (all characters are identical). +- English text generally has an IC of between 0.67 to 0.78. +- 'Random' text is determined by the probability that each letter occurs the same number of times as another. + +The graph shows the IC of the input data. A low IC generally means that the text is random, encoded or encrypted. + + + `; + } + +} + +export default IndexOfCoincidence; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index 8129d4eb..229ef8b9 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -49,6 +49,7 @@ import "./tests/Hash"; import "./tests/HaversineDistance"; import "./tests/Hexdump"; import "./tests/Image"; +import "./tests/IndexOfCoincidence"; import "./tests/Jump"; import "./tests/JSONBeautify"; import "./tests/JSONMinify"; diff --git a/tests/operations/tests/IndexOfCoincidence.mjs b/tests/operations/tests/IndexOfCoincidence.mjs new file mode 100644 index 00000000..3dc4cd35 --- /dev/null +++ b/tests/operations/tests/IndexOfCoincidence.mjs @@ -0,0 +1,22 @@ +/** + * Index of Coincidence tests. + * + * @author George O [georgeomnet+cyberchef@gmail.com] + * @copyright Crown Copyright 2019 + * @license Apache-2.0 + */ +import TestRegister from "../TestRegister"; + +TestRegister.addTests([ + { + name: "Index of Coincidence", + input: "Hello world, this is a test to determine the correct IC value.", + expectedMatch: /^Index of Coincidence: 0\.07142857142857142\nNormalized: 1\.857142857142857/, + recipeConfig: [ + { + "op": "Index of Coincidence", + "args": [] + }, + ], + }, +]);