Improved domain name regex

This commit is contained in:
n1474335 2017-09-06 16:43:30 +00:00
parent bf833991bf
commit 73823e3eb9
4 changed files with 4 additions and 8 deletions

View File

@ -4,7 +4,6 @@ node_js:
install: npm install install: npm install
before_script: before_script:
- npm install -g grunt - npm install -g grunt
- if [ "$TRAVIS_TAG" ]; then git checkout -b travis-build; fi
script: script:
- grunt lint - grunt lint
- grunt test - grunt test

View File

@ -2140,7 +2140,7 @@ const OperationConfig = {
] ]
}, },
"Extract domains": { "Extract domains": {
description: "Extracts domain names with common Top-Level Domains (TLDs).<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.", description: "Extracts domain names.<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
run: Extract.runDomains, run: Extract.runDomains,
inputType: "string", inputType: "string",
outputType: "string", outputType: "string",

View File

@ -187,11 +187,8 @@ const Extract = {
* @returns {string} * @returns {string}
*/ */
runDomains: function(input, args) { runDomains: function(input, args) {
let displayTotal = args[0], const displayTotal = args[0],
protocol = "https?://", regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig;
hostname = "[-\\w\\.]+",
tld = "\\.(?:com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+",
regex = new RegExp("(?:" + protocol + ")?" + hostname + tld, "ig");
return Extract._search(input, regex, null, displayTotal); return Extract._search(input, regex, null, displayTotal);
}, },

View File

@ -40,7 +40,7 @@ const StrUtils = {
}, },
{ {
name: "Domain", name: "Domain",
value: "(?:(https?):\\/\\/)?([-\\w.]+)\\.(com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+" value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
}, },
{ {
name: "Windows file path", name: "Windows file path",