From 73823e3eb9b3695a6777dc02879c286e14e061b4 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Wed, 6 Sep 2017 16:43:30 +0000 Subject: [PATCH] Improved domain name regex --- .travis.yml | 1 - src/core/config/OperationConfig.js | 2 +- src/core/operations/Extract.js | 7 ++----- src/core/operations/StrUtils.js | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index c6bd204c..5f039562 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ node_js: install: npm install before_script: - npm install -g grunt - - if [ "$TRAVIS_TAG" ]; then git checkout -b travis-build; fi script: - grunt lint - grunt test diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 4493bf9c..bc2bf399 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -2140,7 +2140,7 @@ const OperationConfig = { ] }, "Extract domains": { - description: "Extracts domain names with common Top-Level Domains (TLDs).
Note that this will not include paths. Use Extract URLs to find entire URLs.", + description: "Extracts domain names.
Note that this will not include paths. Use Extract URLs to find entire URLs.", run: Extract.runDomains, inputType: "string", outputType: "string", diff --git a/src/core/operations/Extract.js b/src/core/operations/Extract.js index 3f9d2d29..d822830a 100755 --- a/src/core/operations/Extract.js +++ b/src/core/operations/Extract.js @@ -187,11 +187,8 @@ const Extract = { * @returns {string} */ runDomains: function(input, args) { - let displayTotal = args[0], - protocol = "https?://", - hostname = "[-\\w\\.]+", - tld = "\\.(?:com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+", - regex = new RegExp("(?:" + protocol + ")?" + hostname + tld, "ig"); + const displayTotal = args[0], + regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig; return Extract._search(input, regex, null, displayTotal); }, diff --git a/src/core/operations/StrUtils.js b/src/core/operations/StrUtils.js index b771eb36..71823053 100755 --- a/src/core/operations/StrUtils.js +++ b/src/core/operations/StrUtils.js @@ -40,7 +40,7 @@ const StrUtils = { }, { name: "Domain", - value: "(?:(https?):\\/\\/)?([-\\w.]+)\\.(com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+" + value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b" }, { name: "Windows file path",