Improved domain name regex

This commit is contained in:
n1474335 2017-09-06 16:43:30 +00:00
parent bf833991bf
commit 73823e3eb9
4 changed files with 4 additions and 8 deletions

View File

@ -4,7 +4,6 @@ node_js:
install: npm install
before_script:
- npm install -g grunt
- if [ "$TRAVIS_TAG" ]; then git checkout -b travis-build; fi
script:
- grunt lint
- grunt test

View File

@ -2140,7 +2140,7 @@ const OperationConfig = {
]
},
"Extract domains": {
description: "Extracts domain names with common Top-Level Domains (TLDs).<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
description: "Extracts domain names.<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
run: Extract.runDomains,
inputType: "string",
outputType: "string",

View File

@ -187,11 +187,8 @@ const Extract = {
* @returns {string}
*/
runDomains: function(input, args) {
let displayTotal = args[0],
protocol = "https?://",
hostname = "[-\\w\\.]+",
tld = "\\.(?:com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+",
regex = new RegExp("(?:" + protocol + ")?" + hostname + tld, "ig");
const displayTotal = args[0],
regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig;
return Extract._search(input, regex, null, displayTotal);
},

View File

@ -40,7 +40,7 @@ const StrUtils = {
},
{
name: "Domain",
value: "(?:(https?):\\/\\/)?([-\\w.]+)\\.(com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+"
value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
},
{
name: "Windows file path",