From a19b02aa8c6bcf64eddbfec61cf503d66ced9564 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Tue, 15 Aug 2017 16:44:45 +0000 Subject: [PATCH] Updated URL regexes to match more unescaped special characters --- src/core/operations/Extract.js | 4 ++-- src/core/operations/StrUtils.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/operations/Extract.js b/src/core/operations/Extract.js index 080da27f..3f9d2d29 100755 --- a/src/core/operations/Extract.js +++ b/src/core/operations/Extract.js @@ -170,9 +170,9 @@ const Extract = { protocol = "[A-Z]+://", hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+", port = ":\\d+", - path = "/[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]*"; + path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*"; - path += "(?:[.!,?]+[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]+)*"; + path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*"; const regex = new RegExp(protocol + hostname + "(?:" + port + ")?(?:" + path + ")?", "ig"); return Extract._search(input, regex, null, displayTotal); diff --git a/src/core/operations/StrUtils.js b/src/core/operations/StrUtils.js index 698e7eef..5df56ca2 100755 --- a/src/core/operations/StrUtils.js +++ b/src/core/operations/StrUtils.js @@ -36,7 +36,7 @@ const StrUtils = { }, { name: "URL", - value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]+)*)?" + value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"\<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"\<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?" }, { name: "Domain",