diff --git a/src/core/FlowControl.js b/src/core/FlowControl.js
index 4c1e6a61..c4f717a8 100755
--- a/src/core/FlowControl.js
+++ b/src/core/FlowControl.js
@@ -267,11 +267,12 @@ const FlowControl = {
runMagic: async function(state) {
const ings = state.opList[state.progress].getIngValues(),
depth = ings[0],
- extLang = ings[1],
+ intensive = ings[1],
+ extLang = ings[2],
dish = state.dish,
currentRecipeConfig = state.opList.map(op => op.getConfig()),
magic = new Magic(dish.get(Dish.ARRAY_BUFFER)),
- options = await magic.speculativeExecution(depth, extLang);
+ options = await magic.speculativeExecution(depth, extLang, intensive);
let output = `
";
+
+ if (!options.length) {
+ output = "Nothing of interest could be detected about the input data.\nHave you tried modifying the operation arguments?";
+ }
dish.set(output, Dish.HTML);
return state;
},
diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js
index 17bc8f4c..8579e93b 100755
--- a/src/core/config/OperationConfig.js
+++ b/src/core/config/OperationConfig.js
@@ -83,7 +83,7 @@ import URL_ from "../operations/URL.js";
const OperationConfig = {
"Magic": {
module: "Default",
- description: "Attempts to detect what the input data is and which operations could help to make more sense of it.",
+ description: "The Magic operation attempts to detect various properties of the input data and suggests which operations could help to make more sense of it.
Options
Depth: If an operation appears to match the data, it will be run and the result will be analysed further. This argument controls the maximum number of levels of recursion.
Intensive mode: When this is turned on, various encodings like XOR and bit rotates are brute-forced to attempt to detect valid data underneath. To improve performance, only the first 100 bytes of the data is brute-forced.
Extensive language support: At each stage, the relative byte frequencies of the data will be compared to average frequencies for a number of languages. The default set consists of ~40 of the most commonly used languages on the Internet. The extensive list consists of 284 languages and can result in many languages matching the data if their byte frequencies are similar.",
inputType: "ArrayBuffer",
outputType: "html",
flowControl: true,
@@ -93,6 +93,11 @@ const OperationConfig = {
type: "number",
value: 3
},
+ {
+ name: "Intensive mode",
+ type: "boolean",
+ value: false
+ },
{
name: "Extensive language support",
type: "boolean",
@@ -1146,7 +1151,7 @@ const OperationConfig = {
args: [],
patterns: [
{
- match: "%[\\da-f]{2}",
+ match: ".*(?:%[\\da-f]{2}.*){4}",
flags: "i",
args: []
},
@@ -1210,7 +1215,7 @@ const OperationConfig = {
args: [],
patterns: [
{
- match: "(?:=[\\da-f]{2}|=\\n)(?:[\\x21-\\x3d\\x3f-\\x7e \\t]|=[\\da-f]{2}|=\\n)*$",
+ match: "^[\\x21-\\x3d\\x3f-\\x7e \\t]*(?:=[\\da-f]{2}|=\\r?\\n)(?:[\\x21-\\x3d\\x3f-\\x7e \\t]|=[\\da-f]{2}|=\\r?\\n)*$",
flags: "i",
args: []
},
diff --git a/src/core/lib/Magic.js b/src/core/lib/Magic.js
index b604ef71..ff28bb2b 100644
--- a/src/core/lib/Magic.js
+++ b/src/core/lib/Magic.js
@@ -167,20 +167,58 @@ class Magic {
return true;
}
+ /**
+ * Generate various simple brute-forced encodings of the data (trucated to 100 bytes).
+ *
+ * @returns {Object[]} - The encoded data and an operation config to generate it.
+ */
+ bruteForce() {
+ const sample = new Uint8Array(this.inputBuffer).slice(0, 100);
+
+ let results = [];
+
+ // 1-byte XOR
+ for (let i = 1; i < 256; i++) {
+ results.push({
+ data: sample.map(b => b ^ i).buffer,
+ conf: {
+ op: "XOR",
+ args: [{"option": "Hex", "string": i.toString(16)}, "Standard", false]
+ }
+ });
+ }
+
+ // Bit rotate
+ for (let i = 1; i < 8; i++) {
+ results.push({
+ data: sample.map(b => (b >> i) | ((b & (Math.pow(2, i) - 1)) << (8 - i))).buffer,
+ conf: {
+ op: "Rotate right",
+ args: [i, false]
+ }
+ });
+ }
+
+
+ return results;
+ }
+
/**
* Speculatively executes matching operations, recording metadata of each result.
*
* @param {number} [depth=0] - How many levels to try to execute
* @param {boolean} [extLang=false] - Extensive language support (false = only check the most
* common Internet languages)
+ * @param {boolean} [intensive=false] - Run brute-forcing on each branch (significantly affects
+ * performance)
* @param {Object[]} [recipeConfig=[]] - The recipe configuration up to this point
* @returns {Object[]} - A sorted list of the recipes most likely to result in correct decoding
*/
- async speculativeExecution(depth = 0, extLang = false, recipeConfig = []) {
+ async speculativeExecution(depth = 0, extLang = false, intensive = false, recipeConfig = []) {
if (depth < 0) return [];
// Find any operations that can be run on this data
- const matchingOps = this.findMatchingOps();
+ let matchingOps = this.findMatchingOps();
let results = [];
@@ -194,8 +232,8 @@ class Magic {
matchingOps: matchingOps
});
- // Execute each of those operations, then recursively call the speculativeExecution() method
- // on the resulting data, recording the properties of each option.
+ // Execute each of the matching operations, then recursively call the speculativeExecution()
+ // method on the resulting data, recording the properties of each option.
await Promise.all(matchingOps.map(async op => {
const dish = new Dish(this.inputBuffer, Dish.ARRAY_BUFFER),
opConfig = {
@@ -209,11 +247,32 @@ class Magic {
await recipe.execute(dish, 0);
const magic = new Magic(dish.get(Dish.ARRAY_BUFFER), this.opPatterns),
- speculativeResults = await magic.speculativeExecution(depth-1, [...recipeConfig, opConfig]);
+ speculativeResults = await magic.speculativeExecution(
+ depth-1, extLang, intensive, [...recipeConfig, opConfig]);
results = results.concat(speculativeResults);
}));
+ if (intensive) {
+ // Run brute forcing of various types on the data and create a new branch for each option
+ const bfEncodings = this.bruteForce();
+
+ await Promise.all(bfEncodings.map(async enc => {
+ const magic = new Magic(enc.data, this.opPatterns),
+ bfResults = await magic.speculativeExecution(
+ depth-1, extLang, false, [...recipeConfig, enc.conf]);
+
+ results = results.concat(bfResults);
+ }));
+ }
+
+ // Prune branches that do not match anything
+ results = results.filter(r =>
+ r.languageScores[0].probability > 0 ||
+ r.fileType ||
+ r.isUTF8 ||
+ r.matchingOps.length);
+
// Return a sorted list of possible recipes along with their properties
return results.sort((a, b) => {
// Each option is sorted based on its most likely language (lower is better)
diff --git a/src/core/operations/FileType.js b/src/core/operations/FileType.js
index d6ebb7c8..b9d399cd 100755
--- a/src/core/operations/FileType.js
+++ b/src/core/operations/FileType.js
@@ -472,16 +472,16 @@ const FileType = {
// Must be before Little-endian UTF-16 BOM
if (buf[0] === 0xFF && buf[1] === 0xFE && buf[2] === 0x00 && buf[3] === 0x00) {
return {
- ext: "",
- mime: "",
+ ext: "UTF32LE",
+ mime: "charset/utf32le",
desc: "Little-endian UTF-32 encoded Unicode byte order mark detected."
};
}
if (buf[0] === 0xFF && buf[1] === 0xFE) {
return {
- ext: "",
- mime: "",
+ ext: "UTF16LE",
+ mime: "charset/utf16le",
desc: "Little-endian UTF-16 encoded Unicode byte order mark detected."
};
}