diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 931715da89..d6b6498d3f 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -19,7 +19,7 @@ jobs:
- name: Install
run: |
npm install
- export NODE_OPTIONS=--max_old_space_size=2048
+ npm run setheapsize
- name: Lint
run: npx grunt lint
diff --git a/.github/workflows/pull_requests.yml b/.github/workflows/pull_requests.yml
index 9b21e385cf..379078b85b 100644
--- a/.github/workflows/pull_requests.yml
+++ b/.github/workflows/pull_requests.yml
@@ -18,7 +18,7 @@ jobs:
- name: Install
run: |
npm install
- export NODE_OPTIONS=--max_old_space_size=2048
+ npm run setheapsize
- name: Lint
run: npx grunt lint
diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml
index dda8fbef54..7985c99a4c 100644
--- a/.github/workflows/releases.yml
+++ b/.github/workflows/releases.yml
@@ -19,7 +19,7 @@ jobs:
- name: Install
run: |
npm install
- export NODE_OPTIONS=--max_old_space_size=2048
+ npm run setheapsize
- name: Lint
run: npx grunt lint
diff --git a/package.json b/package.json
index 75ce4876c4..8b4018ffee 100644
--- a/package.json
+++ b/package.json
@@ -173,6 +173,8 @@
"testuidev": "npx nightwatch --env=dev",
"lint": "npx grunt lint",
"postinstall": "npx grunt exec:fixCryptoApiImports",
- "newop": "node --experimental-modules src/core/config/scripts/newOperation.mjs"
+ "newop": "node --experimental-modules src/core/config/scripts/newOperation.mjs",
+ "getheapsize": "node -e 'console.log(`node heap limit = ${require(\"v8\").getHeapStatistics().heap_size_limit / (1024 * 1024)} Mb`)'",
+ "setheapsize": "export NODE_OPTIONS=--max_old_space_size=2048"
}
}
diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json
index 257f474206..3a5eb0d556 100755
--- a/src/core/config/Categories.json
+++ b/src/core/config/Categories.json
@@ -238,6 +238,7 @@
"Pad lines",
"Find / Replace",
"Regular expression",
+ "Fuzzy Match",
"Offset checker",
"Hamming Distance",
"Convert distance",
diff --git a/src/core/lib/FuzzySearch.mjs b/src/core/lib/FuzzyMatch.mjs
similarity index 68%
rename from src/core/lib/FuzzySearch.mjs
rename to src/core/lib/FuzzyMatch.mjs
index d24f3fe691..693527a513 100644
--- a/src/core/lib/FuzzySearch.mjs
+++ b/src/core/lib/FuzzyMatch.mjs
@@ -16,40 +16,72 @@
* Anurag Awasthi - updated to 0.2.0
*/
-const SEQUENTIAL_BONUS = 15; // bonus for adjacent matches
-const SEPARATOR_BONUS = 30; // bonus if match occurs after a separator
-const CAMEL_BONUS = 30; // bonus if match is uppercase and prev is lower
-const FIRST_LETTER_BONUS = 15; // bonus if the first letter is matched
+export const DEFAULT_WEIGHTS = {
+ sequentialBonus: 15, // bonus for adjacent matches
+ separatorBonus: 30, // bonus if match occurs after a separator
+ camelBonus: 30, // bonus if match is uppercase and prev is lower
+ firstLetterBonus: 15, // bonus if the first letter is matched
-const LEADING_LETTER_PENALTY = -5; // penalty applied for every letter in str before the first match
-const MAX_LEADING_LETTER_PENALTY = -15; // maximum penalty for leading letters
-const UNMATCHED_LETTER_PENALTY = -1;
+ leadingLetterPenalty: -5, // penalty applied for every letter in str before the first match
+ maxLeadingLetterPenalty: -15, // maximum penalty for leading letters
+ unmatchedLetterPenalty: -1
+};
/**
* Does a fuzzy search to find pattern inside a string.
- * @param {*} pattern string pattern to search for
- * @param {*} str string string which is being searched
+ * @param {string} pattern pattern to search for
+ * @param {string} str string which is being searched
+ * @param {boolean} global whether to search for all matches or just one
* @returns [boolean, number] a boolean which tells if pattern was
* found or not and a search score
*/
-export function fuzzyMatch(pattern, str) {
+export function fuzzyMatch(pattern, str, global=false, weights=DEFAULT_WEIGHTS) {
const recursionCount = 0;
const recursionLimit = 10;
const matches = [];
const maxMatches = 256;
- return fuzzyMatchRecursive(
- pattern,
- str,
- 0 /* patternCurIndex */,
- 0 /* strCurrIndex */,
- null /* srcMatces */,
- matches,
- maxMatches,
- 0 /* nextMatch */,
- recursionCount,
- recursionLimit
- );
+ if (!global) {
+ return fuzzyMatchRecursive(
+ pattern,
+ str,
+ 0 /* patternCurIndex */,
+ 0 /* strCurrIndex */,
+ null /* srcMatches */,
+ matches,
+ maxMatches,
+ 0 /* nextMatch */,
+ recursionCount,
+ recursionLimit,
+ weights
+ );
+ }
+
+ // Return all matches
+ let foundMatch = true,
+ score,
+ idxs,
+ strCurrIndex = 0;
+ const results = [];
+
+ while (foundMatch) {
+ [foundMatch, score, idxs] = fuzzyMatchRecursive(
+ pattern,
+ str,
+ 0 /* patternCurIndex */,
+ strCurrIndex,
+ null /* srcMatches */,
+ matches,
+ maxMatches,
+ 0 /* nextMatch */,
+ recursionCount,
+ recursionLimit,
+ weights
+ );
+ if (foundMatch) results.push([foundMatch, score, [...idxs]]);
+ strCurrIndex = idxs[idxs.length - 1] + 1;
+ }
+ return results;
}
/**
@@ -65,7 +97,8 @@ function fuzzyMatchRecursive(
maxMatches,
nextMatch,
recursionCount,
- recursionLimit
+ recursionLimit,
+ weights
) {
let outScore = 0;
@@ -110,7 +143,8 @@ function fuzzyMatchRecursive(
maxMatches,
nextMatch,
recursionCount,
- recursionLimit
+ recursionLimit,
+ weights
);
if (matched) {
@@ -134,16 +168,16 @@ function fuzzyMatchRecursive(
outScore = 100;
// Apply leading letter penalty
- let penalty = LEADING_LETTER_PENALTY * matches[0];
+ let penalty = weights.leadingLetterPenalty * matches[0];
penalty =
- penalty < MAX_LEADING_LETTER_PENALTY ?
- MAX_LEADING_LETTER_PENALTY :
+ penalty < weights.maxLeadingLetterPenalty ?
+ weights.maxLeadingLetterPenalty :
penalty;
outScore += penalty;
// Apply unmatched penalty
const unmatched = str.length - nextMatch;
- outScore += UNMATCHED_LETTER_PENALTY * unmatched;
+ outScore += weights.unmatchedLetterPenalty * unmatched;
// Apply ordering bonuses
for (let i = 0; i < nextMatch; i++) {
@@ -152,7 +186,7 @@ function fuzzyMatchRecursive(
if (i > 0) {
const prevIdx = matches[i - 1];
if (currIdx === prevIdx + 1) {
- outScore += SEQUENTIAL_BONUS;
+ outScore += weights.sequentialBonus;
}
}
@@ -165,15 +199,15 @@ function fuzzyMatchRecursive(
neighbor !== neighbor.toUpperCase() &&
curr !== curr.toLowerCase()
) {
- outScore += CAMEL_BONUS;
+ outScore += weights.camelBonus;
}
const isNeighbourSeparator = neighbor === "_" || neighbor === " ";
if (isNeighbourSeparator) {
- outScore += SEPARATOR_BONUS;
+ outScore += weights.separatorBonus;
}
} else {
// First letter
- outScore += FIRST_LETTER_BONUS;
+ outScore += weights.firstLetterBonus;
}
}
diff --git a/src/core/operations/FuzzyMatch.mjs b/src/core/operations/FuzzyMatch.mjs
new file mode 100644
index 0000000000..f7c9b3580b
--- /dev/null
+++ b/src/core/operations/FuzzyMatch.mjs
@@ -0,0 +1,120 @@
+/**
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2021
+ * @license Apache-2.0
+ */
+
+import Operation from "../Operation.mjs";
+import {fuzzyMatch, calcMatchRanges, DEFAULT_WEIGHTS} from "../lib/FuzzyMatch.mjs";
+
+/**
+ * Fuzzy Match operation
+ */
+class FuzzyMatch extends Operation {
+
+ /**
+ * FuzzyMatch constructor
+ */
+ constructor() {
+ super();
+
+ this.name = "Fuzzy Match";
+ this.module = "Default";
+ this.description = "Conducts a fuzzy search to find a pattern within the input based on weighted criteria.
e.g. A search for dpan
will match on Don't Panic
";
+ this.infoURL = "https://wikipedia.org/wiki/Fuzzy_matching_(computer-assisted_translation)";
+ this.inputType = "string";
+ this.outputType = "html";
+ this.args = [
+ {
+ name: "Search",
+ type: "binaryString",
+ value: ""
+ },
+ {
+ name: "Sequential bonus",
+ type: "number",
+ value: DEFAULT_WEIGHTS.sequentialBonus,
+ hint: "Bonus for adjacent matches"
+ },
+ {
+ name: "Separator bonus",
+ type: "number",
+ value: DEFAULT_WEIGHTS.separatorBonus,
+ hint: "Bonus if match occurs after a separator"
+ },
+ {
+ name: "Camel bonus",
+ type: "number",
+ value: DEFAULT_WEIGHTS.camelBonus,
+ hint: "Bonus if match is uppercase and previous is lower"
+ },
+ {
+ name: "First letter bonus",
+ type: "number",
+ value: DEFAULT_WEIGHTS.firstLetterBonus,
+ hint: "Bonus if the first letter is matched"
+ },
+ {
+ name: "Leading letter penalty",
+ type: "number",
+ value: DEFAULT_WEIGHTS.leadingLetterPenalty,
+ hint: "Penalty applied for every letter in the input before the first match"
+ },
+ {
+ name: "Max leading letter penalty",
+ type: "number",
+ value: DEFAULT_WEIGHTS.maxLeadingLetterPenalty,
+ hint: "Maxiumum penalty for leading letters"
+ },
+ {
+ name: "Unmatched letter penalty",
+ type: "number",
+ value: DEFAULT_WEIGHTS.unmatchedLetterPenalty
+ },
+ ];
+ }
+
+ /**
+ * @param {string} input
+ * @param {Object[]} args
+ * @returns {html}
+ */
+ run(input, args) {
+ const searchStr = args[0];
+ const weights = {
+ sequentialBonus: args[1],
+ separatorBonus: args[2],
+ camelBonus: args[3],
+ firstLetterBonus: args[4],
+ leadingLetterPenalty: args[5],
+ maxLeadingLetterPenalty: args[6],
+ unmatchedLetterPenalty: args[7]
+ };
+ const matches = fuzzyMatch(searchStr, input, true, weights);
+
+ if (!matches) {
+ return "No matches.";
+ }
+
+ let result = "", pos = 0, hlClass = "hl1";
+ matches.forEach(([matches, score, idxs]) => {
+ const matchRanges = calcMatchRanges(idxs);
+
+ matchRanges.forEach(([start, length], i) => {
+ result += input.slice(pos, start);
+ if (i === 0) result += ``;
+ pos = start + length;
+ result += `${input.slice(start, pos)}`;
+ });
+ result += "";
+ hlClass = hlClass === "hl1" ? "hl2" : "hl1";
+ });
+
+ result += input.slice(pos, input.length);
+
+ return result;
+ }
+
+}
+
+export default FuzzyMatch;
diff --git a/src/core/operations/RegularExpression.mjs b/src/core/operations/RegularExpression.mjs
index 8771b55f95..1d8de9c4ef 100644
--- a/src/core/operations/RegularExpression.mjs
+++ b/src/core/operations/RegularExpression.mjs
@@ -185,7 +185,7 @@ class RegularExpression extends Operation {
* @param {boolean} captureGroups - Display each of the capture groups separately
* @returns {string}
*/
-function regexList (input, regex, displayTotal, matches, captureGroups) {
+function regexList(input, regex, displayTotal, matches, captureGroups) {
let output = "",
total = 0,
match;
@@ -225,7 +225,7 @@ function regexList (input, regex, displayTotal, matches, captureGroups) {
* @param {boolean} displayTotal
* @returns {string}
*/
-function regexHighlight (input, regex, displayTotal) {
+function regexHighlight(input, regex, displayTotal) {
let output = "",
title = "",
hl = 1,
diff --git a/src/web/waiters/OperationsWaiter.mjs b/src/web/waiters/OperationsWaiter.mjs
index 200ae5df8d..6efbab72c3 100755
--- a/src/web/waiters/OperationsWaiter.mjs
+++ b/src/web/waiters/OperationsWaiter.mjs
@@ -6,7 +6,7 @@
import HTMLOperation from "../HTMLOperation.mjs";
import Sortable from "sortablejs";
-import {fuzzyMatch, calcMatchRanges} from "../../core/lib/FuzzySearch.mjs";
+import {fuzzyMatch, calcMatchRanges} from "../../core/lib/FuzzyMatch.mjs";
/**