Added 'Intensive mode' to the Magic operation, where it brute-forces various simple encodings like XOR or bit rotates.

This commit is contained in:
n1474335 2018-02-14 16:08:59 +00:00
parent 544d78f461
commit 99ade42e9a
4 changed files with 83 additions and 14 deletions

View File

@ -267,11 +267,12 @@ const FlowControl = {
runMagic: async function(state) {
const ings = state.opList[state.progress].getIngValues(),
depth = ings[0],
extLang = ings[1],
intensive = ings[1],
extLang = ings[2],
dish = state.dish,
currentRecipeConfig = state.opList.map(op => op.getConfig()),
magic = new Magic(dish.get(Dish.ARRAY_BUFFER)),
options = await magic.speculativeExecution(depth, extLang);
options = await magic.speculativeExecution(depth, extLang, intensive);
let output = `<table
class='table table-hover table-condensed table-bordered'
@ -319,6 +320,10 @@ const FlowControl = {
});
output += "</table>";
if (!options.length) {
output = "Nothing of interest could be detected about the input data.\nHave you tried modifying the operation arguments?";
}
dish.set(output, Dish.HTML);
return state;
},

View File

@ -83,7 +83,7 @@ import URL_ from "../operations/URL.js";
const OperationConfig = {
"Magic": {
module: "Default",
description: "Attempts to detect what the input data is and which operations could help to make more sense of it.",
description: "The Magic operation attempts to detect various properties of the input data and suggests which operations could help to make more sense of it.<br><br><b>Options</b><br><u>Depth:</u> If an operation appears to match the data, it will be run and the result will be analysed further. This argument controls the maximum number of levels of recursion.<br><br><u>Intensive mode:</u> When this is turned on, various encodings like XOR and bit rotates are brute-forced to attempt to detect valid data underneath. To improve performance, only the first 100 bytes of the data is brute-forced.<br><br><u>Extensive language support:</u> At each stage, the relative byte frequencies of the data will be compared to average frequencies for a number of languages. The default set consists of ~40 of the most commonly used languages on the Internet. The extensive list consists of 284 languages and can result in many languages matching the data if their byte frequencies are similar.",
inputType: "ArrayBuffer",
outputType: "html",
flowControl: true,
@ -93,6 +93,11 @@ const OperationConfig = {
type: "number",
value: 3
},
{
name: "Intensive mode",
type: "boolean",
value: false
},
{
name: "Extensive language support",
type: "boolean",
@ -1146,7 +1151,7 @@ const OperationConfig = {
args: [],
patterns: [
{
match: "%[\\da-f]{2}",
match: ".*(?:%[\\da-f]{2}.*){4}",
flags: "i",
args: []
},
@ -1210,7 +1215,7 @@ const OperationConfig = {
args: [],
patterns: [
{
match: "(?:=[\\da-f]{2}|=\\n)(?:[\\x21-\\x3d\\x3f-\\x7e \\t]|=[\\da-f]{2}|=\\n)*$",
match: "^[\\x21-\\x3d\\x3f-\\x7e \\t]*(?:=[\\da-f]{2}|=\\r?\\n)(?:[\\x21-\\x3d\\x3f-\\x7e \\t]|=[\\da-f]{2}|=\\r?\\n)*$",
flags: "i",
args: []
},

View File

@ -167,20 +167,58 @@ class Magic {
return true;
}
/**
* Generate various simple brute-forced encodings of the data (trucated to 100 bytes).
*
* @returns {Object[]} - The encoded data and an operation config to generate it.
*/
bruteForce() {
const sample = new Uint8Array(this.inputBuffer).slice(0, 100);
let results = [];
// 1-byte XOR
for (let i = 1; i < 256; i++) {
results.push({
data: sample.map(b => b ^ i).buffer,
conf: {
op: "XOR",
args: [{"option": "Hex", "string": i.toString(16)}, "Standard", false]
}
});
}
// Bit rotate
for (let i = 1; i < 8; i++) {
results.push({
data: sample.map(b => (b >> i) | ((b & (Math.pow(2, i) - 1)) << (8 - i))).buffer,
conf: {
op: "Rotate right",
args: [i, false]
}
});
}
return results;
}
/**
* Speculatively executes matching operations, recording metadata of each result.
*
* @param {number} [depth=0] - How many levels to try to execute
* @param {boolean} [extLang=false] - Extensive language support (false = only check the most
* common Internet languages)
* @param {boolean} [intensive=false] - Run brute-forcing on each branch (significantly affects
* performance)
* @param {Object[]} [recipeConfig=[]] - The recipe configuration up to this point
* @returns {Object[]} - A sorted list of the recipes most likely to result in correct decoding
*/
async speculativeExecution(depth = 0, extLang = false, recipeConfig = []) {
async speculativeExecution(depth = 0, extLang = false, intensive = false, recipeConfig = []) {
if (depth < 0) return [];
// Find any operations that can be run on this data
const matchingOps = this.findMatchingOps();
let matchingOps = this.findMatchingOps();
let results = [];
@ -194,8 +232,8 @@ class Magic {
matchingOps: matchingOps
});
// Execute each of those operations, then recursively call the speculativeExecution() method
// on the resulting data, recording the properties of each option.
// Execute each of the matching operations, then recursively call the speculativeExecution()
// method on the resulting data, recording the properties of each option.
await Promise.all(matchingOps.map(async op => {
const dish = new Dish(this.inputBuffer, Dish.ARRAY_BUFFER),
opConfig = {
@ -209,11 +247,32 @@ class Magic {
await recipe.execute(dish, 0);
const magic = new Magic(dish.get(Dish.ARRAY_BUFFER), this.opPatterns),
speculativeResults = await magic.speculativeExecution(depth-1, [...recipeConfig, opConfig]);
speculativeResults = await magic.speculativeExecution(
depth-1, extLang, intensive, [...recipeConfig, opConfig]);
results = results.concat(speculativeResults);
}));
if (intensive) {
// Run brute forcing of various types on the data and create a new branch for each option
const bfEncodings = this.bruteForce();
await Promise.all(bfEncodings.map(async enc => {
const magic = new Magic(enc.data, this.opPatterns),
bfResults = await magic.speculativeExecution(
depth-1, extLang, false, [...recipeConfig, enc.conf]);
results = results.concat(bfResults);
}));
}
// Prune branches that do not match anything
results = results.filter(r =>
r.languageScores[0].probability > 0 ||
r.fileType ||
r.isUTF8 ||
r.matchingOps.length);
// Return a sorted list of possible recipes along with their properties
return results.sort((a, b) => {
// Each option is sorted based on its most likely language (lower is better)

View File

@ -472,16 +472,16 @@ const FileType = {
// Must be before Little-endian UTF-16 BOM
if (buf[0] === 0xFF && buf[1] === 0xFE && buf[2] === 0x00 && buf[3] === 0x00) {
return {
ext: "",
mime: "",
ext: "UTF32LE",
mime: "charset/utf32le",
desc: "Little-endian UTF-32 encoded Unicode byte order mark detected."
};
}
if (buf[0] === 0xFF && buf[1] === 0xFE) {
return {
ext: "",
mime: "",
ext: "UTF16LE",
mime: "charset/utf16le",
desc: "Little-endian UTF-16 encoded Unicode byte order mark detected."
};
}