CyberChef/src/core/lib/FuzzyMatch.mjs

254 lines
7.2 KiB
JavaScript
Raw Normal View History

2021-02-05 18:54:57 +01:00
/**
* LICENSE
*
* This software is dual-licensed to the public domain and under the following
* license: you are granted a perpetual, irrevocable license to copy, modify,
* publish, and distribute this file as you see fit.
*
* VERSION
* 0.1.0 (2016-03-28) Initial release
*
* AUTHOR
* Forrest Smith
*
* CONTRIBUTORS
* J<EFBFBD>rgen Tjern<EFBFBD> - async helper
* Anurag Awasthi - updated to 0.2.0
*/
2021-02-12 14:51:51 +01:00
export const DEFAULT_WEIGHTS = {
sequentialBonus: 15, // bonus for adjacent matches
separatorBonus: 30, // bonus if match occurs after a separator
camelBonus: 30, // bonus if match is uppercase and prev is lower
firstLetterBonus: 15, // bonus if the first letter is matched
2021-02-05 18:54:57 +01:00
2021-02-12 14:51:51 +01:00
leadingLetterPenalty: -5, // penalty applied for every letter in str before the first match
maxLeadingLetterPenalty: -15, // maximum penalty for leading letters
unmatchedLetterPenalty: -1
};
2021-02-05 18:54:57 +01:00
/**
* Does a fuzzy search to find pattern inside a string.
2021-02-12 14:51:51 +01:00
* @param {string} pattern pattern to search for
* @param {string} str string which is being searched
* @param {boolean} global whether to search for all matches or just one
2021-02-05 18:54:57 +01:00
* @returns [boolean, number] a boolean which tells if pattern was
* found or not and a search score
*/
2021-02-12 14:51:51 +01:00
export function fuzzyMatch(pattern, str, global=false, weights=DEFAULT_WEIGHTS) {
2021-02-05 18:54:57 +01:00
const recursionCount = 0;
const recursionLimit = 10;
const matches = [];
const maxMatches = 256;
2021-02-12 14:51:51 +01:00
if (!global) {
return fuzzyMatchRecursive(
pattern,
str,
0 /* patternCurIndex */,
0 /* strCurrIndex */,
null /* srcMatches */,
matches,
maxMatches,
0 /* nextMatch */,
recursionCount,
recursionLimit,
weights
);
}
// Return all matches
let foundMatch = true,
score,
idxs,
strCurrIndex = 0;
const results = [];
while (foundMatch) {
[foundMatch, score, idxs] = fuzzyMatchRecursive(
pattern,
str,
0 /* patternCurIndex */,
strCurrIndex,
null /* srcMatches */,
matches,
maxMatches,
0 /* nextMatch */,
recursionCount,
recursionLimit,
weights
);
if (foundMatch) results.push([foundMatch, score, [...idxs]]);
strCurrIndex = idxs[idxs.length - 1] + 1;
}
return results;
2021-02-05 18:54:57 +01:00
}
/**
* Recursive helper function
*/
function fuzzyMatchRecursive(
pattern,
str,
patternCurIndex,
strCurrIndex,
srcMatches,
matches,
maxMatches,
nextMatch,
recursionCount,
2021-02-12 14:51:51 +01:00
recursionLimit,
weights
2021-02-05 18:54:57 +01:00
) {
let outScore = 0;
// Return if recursion limit is reached.
if (++recursionCount >= recursionLimit) {
2021-02-05 20:04:27 +01:00
return [false, outScore, []];
2021-02-05 18:54:57 +01:00
}
// Return if we reached ends of strings.
if (patternCurIndex === pattern.length || strCurrIndex === str.length) {
2021-02-05 20:04:27 +01:00
return [false, outScore, []];
2021-02-05 18:54:57 +01:00
}
// Recursion params
let recursiveMatch = false;
let bestRecursiveMatches = [];
let bestRecursiveScore = 0;
// Loop through pattern and str looking for a match.
let firstMatch = true;
while (patternCurIndex < pattern.length && strCurrIndex < str.length) {
// Match found.
if (
pattern[patternCurIndex].toLowerCase() === str[strCurrIndex].toLowerCase()
) {
if (nextMatch >= maxMatches) {
2021-02-05 20:04:27 +01:00
return [false, outScore, []];
2021-02-05 18:54:57 +01:00
}
if (firstMatch && srcMatches) {
matches = [...srcMatches];
firstMatch = false;
}
2021-02-05 20:04:27 +01:00
const [matched, recursiveScore, recursiveMatches] = fuzzyMatchRecursive(
2021-02-05 18:54:57 +01:00
pattern,
str,
patternCurIndex,
strCurrIndex + 1,
matches,
recursiveMatches,
maxMatches,
nextMatch,
recursionCount,
2021-02-12 14:51:51 +01:00
recursionLimit,
weights
2021-02-05 18:54:57 +01:00
);
if (matched) {
// Pick best recursive score.
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
bestRecursiveMatches = [...recursiveMatches];
bestRecursiveScore = recursiveScore;
}
recursiveMatch = true;
}
matches[nextMatch++] = strCurrIndex;
++patternCurIndex;
}
++strCurrIndex;
}
const matched = patternCurIndex === pattern.length;
if (matched) {
outScore = 100;
// Apply leading letter penalty
2021-02-12 14:51:51 +01:00
let penalty = weights.leadingLetterPenalty * matches[0];
2021-02-05 18:54:57 +01:00
penalty =
2021-02-12 14:51:51 +01:00
penalty < weights.maxLeadingLetterPenalty ?
weights.maxLeadingLetterPenalty :
2021-02-05 18:54:57 +01:00
penalty;
outScore += penalty;
// Apply unmatched penalty
const unmatched = str.length - nextMatch;
2021-02-12 14:51:51 +01:00
outScore += weights.unmatchedLetterPenalty * unmatched;
2021-02-05 18:54:57 +01:00
// Apply ordering bonuses
for (let i = 0; i < nextMatch; i++) {
const currIdx = matches[i];
if (i > 0) {
const prevIdx = matches[i - 1];
if (currIdx === prevIdx + 1) {
2021-02-12 14:51:51 +01:00
outScore += weights.sequentialBonus;
2021-02-05 18:54:57 +01:00
}
}
// Check for bonuses based on neighbor character value.
if (currIdx > 0) {
// Camel case
const neighbor = str[currIdx - 1];
const curr = str[currIdx];
if (
neighbor !== neighbor.toUpperCase() &&
curr !== curr.toLowerCase()
) {
2021-02-12 14:51:51 +01:00
outScore += weights.camelBonus;
2021-02-05 18:54:57 +01:00
}
const isNeighbourSeparator = neighbor === "_" || neighbor === " ";
if (isNeighbourSeparator) {
2021-02-12 14:51:51 +01:00
outScore += weights.separatorBonus;
2021-02-05 18:54:57 +01:00
}
} else {
// First letter
2021-02-12 14:51:51 +01:00
outScore += weights.firstLetterBonus;
2021-02-05 18:54:57 +01:00
}
}
// Return best result
if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
// Recursive score is better than "this"
2021-02-05 20:04:27 +01:00
matches = bestRecursiveMatches;
2021-02-05 18:54:57 +01:00
outScore = bestRecursiveScore;
2021-02-05 20:04:27 +01:00
return [true, outScore, matches];
2021-02-05 18:54:57 +01:00
} else if (matched) {
// "this" score is better than recursive
2021-02-05 20:04:27 +01:00
return [true, outScore, matches];
2021-02-05 18:54:57 +01:00
} else {
2021-02-05 20:04:27 +01:00
return [false, outScore, matches];
2021-02-05 18:54:57 +01:00
}
}
2021-02-05 20:04:27 +01:00
return [false, outScore, matches];
2021-02-05 18:54:57 +01:00
}
/**
* Turns a list of match indexes into a list of match ranges
*
* @author n1474335 [n1474335@gmail.com]
* @param [number] matches
* @returns [[number]]
*/
2021-02-05 20:04:27 +01:00
export function calcMatchRanges(matches) {
2021-02-05 18:54:57 +01:00
const ranges = [];
let start = matches[0],
curr = start;
matches.forEach(m => {
if (m === curr || m === curr + 1) curr = m;
else {
ranges.push([start, curr - start + 1]);
start = m;
curr = m;
}
});
ranges.push([start, curr - start + 1]);
return ranges;
}