From 81e1abd68236623cb7d46ee08eb69a5d1c2e200a Mon Sep 17 00:00:00 2001 From: sw5678 <151949597+sw5678@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:57:28 +0000 Subject: [PATCH 1/3] Improving efficency of RAKE --- src/core/operations/RAKE.mjs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/core/operations/RAKE.mjs b/src/core/operations/RAKE.mjs index d1165b51..d54143ae 100644 --- a/src/core/operations/RAKE.mjs +++ b/src/core/operations/RAKE.mjs @@ -101,22 +101,17 @@ class RAKE extends Operation { phrases = phrases.filter(subArray => subArray.length > 0); // Remove duplicate phrases - const uniquePhrases = [...new Set(phrases.map(function (phrase) { - return phrase.join(" "); - }))]; - phrases = uniquePhrases.map(function (phrase) { - return phrase.split(" "); - }); - + phrases = phrases.unique(); + // Generate word_degree_matrix and populate - const wordDegreeMatrix = Array.from(Array(tokens.length), _ => Array(tokens.length).fill(0)); - phrases.forEach(function (phrase) { - phrase.forEach(function (word1) { - phrase.forEach(function (word2) { - wordDegreeMatrix[tokens.indexOf(word1)][tokens.indexOf(word2)]++; - }); - }); - }); + const wordDegreeMatrix = Array(tokens.length).fill().map(() => Array(tokens.length).fill(0)); + for (let p=0; p < phrases.length; p++) { + for (let w1=0; w1 < phrases[p].length; w1++) { + for (let w2=0; w2 < phrases[p].length; w2++) { + wordDegreeMatrix[tokens.indexOf(phrases[p][w1])][tokens.indexOf(phrases[p][w2])]++; + } + } + } // Calculate degree score for each token const degreeScores = Array(tokens.length).fill(0); From 2191d20fb582e4551ec578f606d6a0098a4a4b93 Mon Sep 17 00:00:00 2001 From: sw5678 <151949597+sw5678@users.noreply.github.com> Date: Fri, 12 Apr 2024 10:40:33 +0100 Subject: [PATCH 2/3] Removed trailing whitespace --- tests/operations/tests/RAKE.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/operations/tests/RAKE.mjs b/tests/operations/tests/RAKE.mjs index 8164ca01..fe718076 100644 --- a/tests/operations/tests/RAKE.mjs +++ b/tests/operations/tests/RAKE.mjs @@ -1,6 +1,6 @@ /** * RAKE, Rapid Automatic Keyword Extraction tests. - * + * * @author sw5678 * @copyright Crown Copyright 2024 * @license Apache-2.0 From 0cfb67bd0636a4fd9c4239c91b8a3eeb18cff2f8 Mon Sep 17 00:00:00 2001 From: sw5678 <151949597+sw5678@users.noreply.github.com> Date: Fri, 12 Apr 2024 11:27:29 +0100 Subject: [PATCH 3/3] Improved readability and efficiency of RAKE --- src/core/operations/RAKE.mjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/operations/RAKE.mjs b/src/core/operations/RAKE.mjs index d54143ae..3dd58c50 100644 --- a/src/core/operations/RAKE.mjs +++ b/src/core/operations/RAKE.mjs @@ -105,10 +105,10 @@ class RAKE extends Operation { // Generate word_degree_matrix and populate const wordDegreeMatrix = Array(tokens.length).fill().map(() => Array(tokens.length).fill(0)); - for (let p=0; p < phrases.length; p++) { - for (let w1=0; w1 < phrases[p].length; w1++) { - for (let w2=0; w2 < phrases[p].length; w2++) { - wordDegreeMatrix[tokens.indexOf(phrases[p][w1])][tokens.indexOf(phrases[p][w2])]++; + for (const phrase of phrases) { + for (const word1 of phrase) { + for (const word2 of phrase) { + wordDegreeMatrix[tokens.indexOf(word1)][tokens.indexOf(word2)]++; } } }