Fixed incomplete multi-character sanitization and incomplete URL substring sanitization issues.

This commit is contained in:
n1474335 2021-02-10 17:41:39 +00:00
parent 530836876f
commit 170e564319
2 changed files with 34 additions and 13 deletions

View File

@ -704,8 +704,21 @@ class Utils {
* Utils.stripHtmlTags("<div>Test</div>");
*/
static stripHtmlTags(htmlStr, removeScriptAndStyle=false) {
/**
* Recursively remove a pattern from a string until there are no more matches.
* Avoids incomplete sanitization e.g. "aabcbc".replace(/abc/g, "") === "abc"
*
* @param {RegExp} pattern
* @param {string} str
* @returns {string}
*/
function recursiveRemove(pattern, str) {
const newStr = str.replace(pattern, "");
return newStr.length === str.length ? newStr : recursiveRemove(pattern, newStr);
}
if (removeScriptAndStyle) {
htmlStr = htmlStr.replace(/<(script|style)[^>]*>.*?<\/(script|style)>/gi, "");
htmlStr = recursiveRemove(/<(script|style)[^>]*>.*?<\/(script|style)>/gi, htmlStr);
}
return htmlStr.replace(/<[^>]+>/g, "");
}

View File

@ -6,6 +6,7 @@
import HTMLIngredient from "./HTMLIngredient.mjs";
import Utils from "../core/Utils.mjs";
import url from "url";
/**
@ -147,22 +148,29 @@ class HTMLOperation {
/**
* Given a URL for a Wikipedia (or other wiki) page, this function returns a link to that page.
*
* @param {string} url
* @param {string} urlStr
* @returns {string}
*/
function titleFromWikiLink(url) {
const splitURL = url.split("/");
if (!splitURL.includes("wikipedia.org") && !splitURL.includes("forensicswiki.xyz")) {
// Not a wiki link, return full URL
return `<a href='${url}' target='_blank'>More Information<i class='material-icons inline-icon'>open_in_new</i></a>`;
function titleFromWikiLink(urlStr) {
const urlObj = url.parse(urlStr);
let wikiName = "",
pageTitle = "";
switch (urlObj.host) {
case "forensicswiki.xyz":
wikiName = "Forensics Wiki";
pageTitle = urlObj.query.substr(6).replace(/_/g, " "); // Chop off 'title='
break;
case "wikipedia.org":
wikiName = "Wikipedia";
pageTitle = urlObj.pathname.substr(6).replace(/_/g, " "); // Chop off '/wiki/'
break;
default:
// Not a wiki link, return full URL
return `<a href='${urlStr}' target='_blank'>More Information<i class='material-icons inline-icon'>open_in_new</i></a>`;
}
const wikiName = splitURL.includes("forensicswiki.xyz") ? "Forensics Wiki" : "Wikipedia";
const pageTitle = decodeURIComponent(splitURL[splitURL.length - 1])
.replace(/_/g, " ")
.replace(/index\.php\?title=/g, "");
return `<a href='${url}' target='_blank'>${pageTitle}<i class='material-icons inline-icon'>open_in_new</i></a> on ${wikiName}`;
return `<a href='${urlObj.href}' target='_blank'>${pageTitle}<i class='material-icons inline-icon'>open_in_new</i></a> on ${wikiName}`;
}
export default HTMLOperation;