CyberChef/src/core/lib/Extract.mjs

65 lines
1.6 KiB
JavaScript

/**
* Identifier extraction functions
*
* @author n1474335 [n1474335@gmail.com]
* @copyright Crown Copyright 2016
* @license Apache-2.0
*
*/
/**
* Runs search operations across the input data using regular expressions.
*
* @param {string} input
* @param {RegExp} searchRegex
* @param {RegExp} [removeRegex=null] - A regular expression defining results to remove from the
* final list
* @param {Function} [sortBy=null] - The sorting comparison function to apply
* @param {boolean} [unique=false] - Whether to unique the results
* @returns {string}
*/
export function search(input, searchRegex, removeRegex=null, sortBy=null, unique=false) {
let results = [];
let match;
while ((match = searchRegex.exec(input))) {
// Moves pointer when an empty string is matched (prevents infinite loop)
if (match.index === searchRegex.lastIndex) {
searchRegex.lastIndex++;
}
if (removeRegex && removeRegex.test(match[0]))
continue;
results.push(match[0]);
}
if (sortBy) {
results = results.sort(sortBy);
}
if (unique) {
results = results.unique();
}
return results;
}
/**
* URL regular expression
*/
const protocol = "[A-Z]+://",
hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
port = ":\\d+",
path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*" +
"(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
export const URL_REGEX = new RegExp(protocol + hostname + "(?:" + port + ")?(?:" + path + ")?", "ig");
/**
* Domain name regular expression
*/
export const DOMAIN_REGEX = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig;