CyberChef/src/core/operations/Extract.js

300 lines
8.1 KiB
JavaScript
Raw Normal View History

2016-11-28 11:42:58 +01:00
/**
* Identifier extraction operations.
*
* @author n1474335 [n1474335@gmail.com]
* @copyright Crown Copyright 2016
* @license Apache-2.0
*
* @namespace
*/
const Extract = {
2016-11-28 11:42:58 +01:00
/**
2016-11-29 19:28:07 +01:00
* Runs search operations across the input data using regular expressions.
2016-11-28 11:42:58 +01:00
*
* @private
* @param {string} input
* @param {RegExp} searchRegex
* @param {RegExp} removeRegex - A regular expression defining results to remove from the
2016-11-28 11:42:58 +01:00
* final list
* @param {boolean} includeTotal - Whether or not to include the total number of results
2016-11-28 11:42:58 +01:00
* @returns {string}
*/
_search: function(input, searchRegex, removeRegex, includeTotal) {
2017-04-13 19:08:50 +02:00
let output = "",
2016-11-28 11:42:58 +01:00
total = 0,
match;
2017-02-09 16:09:33 +01:00
while ((match = searchRegex.exec(input))) {
if (removeRegex && removeRegex.test(match[0]))
2016-11-28 11:42:58 +01:00
continue;
total++;
output += match[0] + "\n";
}
2017-02-09 16:09:33 +01:00
if (includeTotal)
2016-11-28 11:42:58 +01:00
output = "Total found: " + total + "\n\n" + output;
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
return output;
},
/**
* @constant
* @default
*/
MIN_STRING_LEN: 3,
/**
* @constant
* @default
*/
DISPLAY_TOTAL: false,
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Strings operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runStrings: function(input, args) {
2017-04-13 19:08:50 +02:00
let minLen = args[0] || Extract.MIN_STRING_LEN,
displayTotal = args[1],
2016-11-28 11:42:58 +01:00
strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]",
regex = new RegExp(strings + "{" + minLen + ",}", "ig");
2017-02-09 16:09:33 +01:00
return Extract._search(input, regex, null, displayTotal);
2016-11-28 11:42:58 +01:00
},
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* @constant
* @default
*/
INCLUDE_IPV4: true,
/**
* @constant
* @default
*/
INCLUDE_IPV6: false,
/**
* @constant
* @default
*/
REMOVE_LOCAL: false,
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Extract IP addresses operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runIp: function(input, args) {
2017-04-13 19:08:50 +02:00
let includeIpv4 = args[0],
includeIpv6 = args[1],
removeLocal = args[2],
displayTotal = args[3],
2016-11-28 11:42:58 +01:00
ipv4 = "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?",
ipv6 = "((?=.*::)(?!.*::.+::)(::)?([\\dA-F]{1,4}:(:|\\b)|){5}|([\\dA-F]{1,4}:){6})((([\\dA-F]{1,4}((?!\\3)::|:\\b|(?![\\dA-F])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})",
ips = "";
2017-02-09 16:09:33 +01:00
if (includeIpv4 && includeIpv6) {
2016-11-28 11:42:58 +01:00
ips = ipv4 + "|" + ipv6;
} else if (includeIpv4) {
2016-11-28 11:42:58 +01:00
ips = ipv4;
} else if (includeIpv6) {
2016-11-28 11:42:58 +01:00
ips = ipv6;
}
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
if (ips) {
2017-04-13 19:08:50 +02:00
const regex = new RegExp(ips, "ig");
2017-02-09 16:09:33 +01:00
if (removeLocal) {
2017-04-13 19:08:50 +02:00
let ten = "10\\..+",
2016-11-28 11:42:58 +01:00
oneninetwo = "192\\.168\\..+",
oneseventwo = "172\\.(?:1[6-9]|2\\d|3[01])\\..+",
onetwoseven = "127\\..+",
removeRegex = new RegExp("^(?:" + ten + "|" + oneninetwo +
2016-11-28 11:42:58 +01:00
"|" + oneseventwo + "|" + onetwoseven + ")");
2017-02-09 16:09:33 +01:00
return Extract._search(input, regex, removeRegex, displayTotal);
2016-11-28 11:42:58 +01:00
} else {
return Extract._search(input, regex, null, displayTotal);
2016-11-28 11:42:58 +01:00
}
} else {
return "";
}
},
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Extract email addresses operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runEmail: function(input, args) {
2017-04-13 19:08:50 +02:00
let displayTotal = args[0],
2016-11-28 11:42:58 +01:00
regex = /\w[-.\w]*@[-\w]+(?:\.[-\w]+)*\.[A-Z]{2,4}/ig;
2017-02-09 16:09:33 +01:00
return Extract._search(input, regex, null, displayTotal);
2016-11-28 11:42:58 +01:00
},
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Extract MAC addresses operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runMac: function(input, args) {
2017-04-13 19:08:50 +02:00
let displayTotal = args[0],
2016-11-28 11:42:58 +01:00
regex = /[A-F\d]{2}(?:[:-][A-F\d]{2}){5}/ig;
2017-02-09 16:09:33 +01:00
return Extract._search(input, regex, null, displayTotal);
2016-11-28 11:42:58 +01:00
},
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Extract URLs operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runUrls: function(input, args) {
2017-04-13 19:08:50 +02:00
let displayTotal = args[0],
2016-11-28 11:42:58 +01:00
protocol = "[A-Z]+://",
hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
port = ":\\d+",
path = "/[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]*";
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
path += "(?:[.!,?]+[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]+)*";
2017-04-13 19:08:50 +02:00
const regex = new RegExp(protocol + hostname + "(?:" + port +
2016-11-28 11:42:58 +01:00
")?(?:" + path + ")?", "ig");
return Extract._search(input, regex, null, displayTotal);
2016-11-28 11:42:58 +01:00
},
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Extract domains operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runDomains: function(input, args) {
2017-04-13 19:08:50 +02:00
let displayTotal = args[0],
2016-11-28 11:42:58 +01:00
protocol = "https?://",
hostname = "[-\\w\\.]+",
tld = "\\.(?:com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+",
regex = new RegExp("(?:" + protocol + ")?" + hostname + tld, "ig");
2017-02-09 16:09:33 +01:00
return Extract._search(input, regex, null, displayTotal);
2016-11-28 11:42:58 +01:00
},
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* @constant
* @default
*/
INCLUDE_WIN_PATH: true,
/**
* @constant
* @default
*/
INCLUDE_UNIX_PATH: true,
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Extract file paths operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runFilePaths: function(input, args) {
2017-04-13 19:08:50 +02:00
let includeWinPath = args[0],
includeUnixPath = args[1],
displayTotal = args[2],
winDrive = "[A-Z]:\\\\",
winName = "[A-Z\\d][A-Z\\d\\- '_\\(\\)]{0,61}",
winExt = "[A-Z\\d]{1,6}",
winPath = winDrive + "(?:" + winName + "\\\\?)*" + winName +
"(?:\\." + winExt + ")?",
unixPath = "(?:/[A-Z\\d.][A-Z\\d\\-.]{0,61})+",
filePaths = "";
2017-02-09 16:09:33 +01:00
if (includeWinPath && includeUnixPath) {
filePaths = winPath + "|" + unixPath;
} else if (includeWinPath) {
filePaths = winPath;
} else if (includeUnixPath) {
filePaths = unixPath;
2016-11-28 11:42:58 +01:00
}
2017-02-09 16:09:33 +01:00
if (filePaths) {
2017-04-13 19:08:50 +02:00
const regex = new RegExp(filePaths, "ig");
return Extract._search(input, regex, null, displayTotal);
2016-11-28 11:42:58 +01:00
} else {
return "";
}
},
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Extract dates operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runDates: function(input, args) {
2017-04-13 19:08:50 +02:00
let displayTotal = args[0],
2016-11-28 11:42:58 +01:00
date1 = "(?:19|20)\\d\\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])", // yyyy-mm-dd
date2 = "(?:0[1-9]|[12][0-9]|3[01])[- /.](?:0[1-9]|1[012])[- /.](?:19|20)\\d\\d", // dd/mm/yyyy
date3 = "(?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])[- /.](?:19|20)\\d\\d", // mm/dd/yyyy
regex = new RegExp(date1 + "|" + date2 + "|" + date3, "ig");
2017-02-09 16:09:33 +01:00
return Extract._search(input, regex, null, displayTotal);
2016-11-28 11:42:58 +01:00
},
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
/**
* Extract all identifiers operation.
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runAllIdents: function(input, args) {
2017-04-13 19:08:50 +02:00
let output = "";
2016-11-28 11:42:58 +01:00
output += "IP addresses\n";
output += Extract.runIp(input, [true, true, false]);
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
output += "\nEmail addresses\n";
output += Extract.runEmail(input, []);
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
output += "\nMAC addresses\n";
output += Extract.runMac(input, []);
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
output += "\nURLs\n";
output += Extract.runUrls(input, []);
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
output += "\nDomain names\n";
output += Extract.runDomains(input, []);
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
output += "\nFile paths\n";
output += Extract.runFilePaths(input, [true, true]);
2017-02-09 16:09:33 +01:00
2016-11-28 11:42:58 +01:00
output += "\nDates\n";
output += Extract.runDates(input, []);
2016-11-28 11:42:58 +01:00
return output;
},
2016-11-29 19:28:07 +01:00
2016-11-28 11:42:58 +01:00
};
export default Extract;