diff --git a/src/core/Utils.mjs b/src/core/Utils.mjs index 9c687ee4..698af68c 100755 --- a/src/core/Utils.mjs +++ b/src/core/Utils.mjs @@ -704,8 +704,21 @@ class Utils { * Utils.stripHtmlTags("
Test
"); */ static stripHtmlTags(htmlStr, removeScriptAndStyle=false) { + /** + * Recursively remove a pattern from a string until there are no more matches. + * Avoids incomplete sanitization e.g. "aabcbc".replace(/abc/g, "") === "abc" + * + * @param {RegExp} pattern + * @param {string} str + * @returns {string} + */ + function recursiveRemove(pattern, str) { + const newStr = str.replace(pattern, ""); + return newStr.length === str.length ? newStr : recursiveRemove(pattern, newStr); + } + if (removeScriptAndStyle) { - htmlStr = htmlStr.replace(/<(script|style)[^>]*>.*?<\/(script|style)>/gi, ""); + htmlStr = recursiveRemove(/<(script|style)[^>]*>.*?<\/(script|style)>/gi, htmlStr); } return htmlStr.replace(/<[^>]+>/g, ""); } diff --git a/src/web/HTMLOperation.mjs b/src/web/HTMLOperation.mjs index df5de5a8..04e9b3cc 100755 --- a/src/web/HTMLOperation.mjs +++ b/src/web/HTMLOperation.mjs @@ -6,6 +6,7 @@ import HTMLIngredient from "./HTMLIngredient.mjs"; import Utils from "../core/Utils.mjs"; +import url from "url"; /** @@ -147,22 +148,29 @@ class HTMLOperation { /** * Given a URL for a Wikipedia (or other wiki) page, this function returns a link to that page. * - * @param {string} url + * @param {string} urlStr * @returns {string} */ -function titleFromWikiLink(url) { - const splitURL = url.split("/"); - if (!splitURL.includes("wikipedia.org") && !splitURL.includes("forensicswiki.xyz")) { - // Not a wiki link, return full URL - return `More Informationopen_in_new`; +function titleFromWikiLink(urlStr) { + const urlObj = url.parse(urlStr); + let wikiName = "", + pageTitle = ""; + + switch (urlObj.host) { + case "forensicswiki.xyz": + wikiName = "Forensics Wiki"; + pageTitle = urlObj.query.substr(6).replace(/_/g, " "); // Chop off 'title=' + break; + case "wikipedia.org": + wikiName = "Wikipedia"; + pageTitle = urlObj.pathname.substr(6).replace(/_/g, " "); // Chop off '/wiki/' + break; + default: + // Not a wiki link, return full URL + return `More Informationopen_in_new`; } - const wikiName = splitURL.includes("forensicswiki.xyz") ? "Forensics Wiki" : "Wikipedia"; - - const pageTitle = decodeURIComponent(splitURL[splitURL.length - 1]) - .replace(/_/g, " ") - .replace(/index\.php\?title=/g, ""); - return `${pageTitle}open_in_new on ${wikiName}`; + return `${pageTitle}open_in_new on ${wikiName}`; } export default HTMLOperation;