Skip to content

Commit

Permalink
post: apply string normalization in deduplication script
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Jan 10, 2022
1 parent 63ce210 commit 94a7f30
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
20 changes: 14 additions & 6 deletions post/deduplication.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,24 @@
*/

const _ = require('lodash');
const prefixes = [ 'name', 'phrase', 'address_parts' ];
const prefixes = ['name', 'phrase', 'address_parts'];
const punctuation = /[\.]+/g;
const normalize = (v) => _.isString(v) ? _.replace(v.toLowerCase(), punctuation, '') : v;

function deduplication( doc ){
// if values are strings then apply a string
// normalization function to both strings.
const comparitor = (value, other) => {
return _.isEqual(normalize(value), normalize(other));
};

function deduplication(doc) {
prefixes.forEach(prefix => {
let index = doc[prefix];
if ( _.isPlainObject( index ) ){
for( let field in index ){
if (_.isPlainObject(index)) {
for (let field in index) {
let values = index[field];
if( _.isArray( values ) && values.length > 1 ){
index[field] = _.uniq(values);
if (_.isArray(values) && values.length > 1) {
index[field] = _.uniqWith(values, comparitor);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions test/post/deduplication.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module.exports.tests.dedupe = function (test) {
doc.setNameAlias('default', 'test');
doc.setNameAlias('default', 'test 2');
doc.setNameAlias('default', 'test');
doc.setNameAlias('default', '.Test.');

deduplication(doc);
t.deepEquals(doc.name.default, ['test', 'test 2']);
Expand All @@ -27,6 +28,7 @@ module.exports.tests.dedupe = function (test) {
doc.setAddressAlias('street', 'test');
doc.setAddressAlias('street', 'test 2');
doc.setAddressAlias('street', 'test');
doc.setAddressAlias('street', '..Test..');

deduplication(doc);
t.deepEquals(doc.address_parts.street, ['test', 'test 2']);
Expand All @@ -42,6 +44,7 @@ module.exports.tests.dedupe = function (test) {
doc.setNameAlias('default', 'test');
doc.setNameAlias('default', 'test 2');
doc.setNameAlias('default', 'test');
doc.setNameAlias('default', '...Te...st...');

deduplication(doc);
t.deepEquals(doc.phrase.default, ['test', 'test 2']);
Expand Down

0 comments on commit 94a7f30

Please sign in to comment.