Updated URL regexes to match more unescaped special characters

This commit is contained in:
n1474335 2017-08-15 16:44:45 +00:00
parent cf1ba60a10
commit a19b02aa8c
2 changed files with 3 additions and 3 deletions

View File

@ -170,9 +170,9 @@ const Extract = {
protocol = "[A-Z]+://",
hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
port = ":\\d+",
path = "/[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]*";
path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*";
path += "(?:[.!,?]+[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]+)*";
path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
const regex = new RegExp(protocol + hostname + "(?:" + port +
")?(?:" + path + ")?", "ig");
return Extract._search(input, regex, null, displayTotal);

View File

@ -36,7 +36,7 @@ const StrUtils = {
},
{
name: "URL",
value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"\<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"\<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
},
{
name: "Domain",