Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add alphanumeric postcodes post-processing script #158

Merged
merged 1 commit into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Document.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ function Document( source, layer, source_id ){
// define default post-processing scripts
this.addPostProcessingScript( require('./post/intersections') );
this.addPostProcessingScript( require('./post/seperable_street_names').post );
this.addPostProcessingScript( require('./post/alphanumeric_postcodes') );
this.addPostProcessingScript( require('./post/deduplication') );
this.addPostProcessingScript( require('./post/language_field_trimming') );
this.addPostProcessingScript( require('./post/popularity') );
Expand Down
50 changes: 50 additions & 0 deletions post/alphanumeric_postcodes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
const _ = require('lodash');
const ADDRESS_LAYER_NAME = 'address';
const ALPHANUMERIC_POSTCODE = /^([0-9]{4})(\s*)([A-Za-z]{2})$/;

/**
* Alphanumeric postcodes post-processing script ensures that both the expanded
* and contracted version of alphanumeric postcodes are indexed.
*
* Without this script a postcode such as '1383GN' would not be matched to the
* query '1383'.
*
* The script is intended to detect these alphanumeric postcodes and index both
* permutations, ie. '1383GN' = ['1383GN', '1383 GN'].
*
* The inverse case should also be covered. ie. '1383 GN' = ['1383 GN', '1383GN'].
*
* Note: the regex is currently restrictive by design, the UK for instance uses
* alphanumeric postcodes in the format 'E81DN' which could cause error when splitting
* with this method, they are currently ignored. Future work should consider global
* postcode formats.
*
* Note: this script is intended to run *before* the 'deduplication' post processing
* script so that prior aliases don't generate duplicate terms.
*/

function postcodes( doc ){

// only apply to docs from the address layer
if( doc.getLayer() !== ADDRESS_LAYER_NAME ){ return; }

// ensure postcode is set
let postcode = doc.getAddress('zip');
if( !_.isString(postcode) || _.isEmpty(postcode) ){ return; }

// ensure postcode is alphanumeric
let matches = postcode.match(ALPHANUMERIC_POSTCODE);
if( matches.length !== 4 ){ return; }

// generate postcode aliases for the postcode.
let [ , numeric, spaces, alpha ] = matches;

// detect if the existing postcode is expanded or not
if ( spaces.length ) {
doc.setAddressAlias('zip', `${numeric}${alpha}`); // add contracted form as alias
} else {
doc.setAddressAlias('zip', `${numeric} ${alpha}`); // add expanded form as alias
}
}

module.exports = postcodes;
6 changes: 5 additions & 1 deletion test/document/post.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
const Document = require('../../Document');
const intersections = require('../../post/intersections');
const seperable_street_names = require('../../post/seperable_street_names').post;
const alphanumeric_postcodes = require('../../post/alphanumeric_postcodes');
const deduplication = require('../../post/deduplication');
const language_field_trimming = require('../../post/language_field_trimming');
const popularity = require('../../post/popularity');
const DEFAULT_SCRIPTS = [intersections, seperable_street_names, deduplication, language_field_trimming, popularity];
const DEFAULT_SCRIPTS = [
intersections, seperable_street_names, alphanumeric_postcodes,
deduplication, language_field_trimming, popularity
];

module.exports.tests = {};

Expand Down
76 changes: 76 additions & 0 deletions test/post/alphanumeric_postcodes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
const Document = require('../../Document');
const postcodes = require('../../post/alphanumeric_postcodes');

module.exports.tests = {};

module.exports.tests.alias = function(test) {
test('expand', function(t) {
const doc = new Document('mysource','address','myid');

// zip not set
postcodes(doc);
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set');

// set postcode
doc.setAddress('zip', '1383GN');

// add expanded version
postcodes(doc);
t.deepEqual(doc.getAddressAliases('zip'), ['1383 GN'], 'alias set');

t.end();
});
test('contract', function(t) {
const doc = new Document('mysource','address','myid');

// zip not set
postcodes(doc);
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set');

// set postcode
doc.setAddress('zip', '1383 GN');

// add contracted version
postcodes(doc);
t.deepEqual(doc.getAddressAliases('zip'), ['1383GN'], 'alias set');

t.end();
});
};

module.exports.tests.noop = function(test) {
test('noop: invalid layer != "address"', function(t) {
const doc = new Document('mysource','not_address','myid');

// set postcode
doc.setAddress('zip', '1383GN');

// no alias added
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set');

t.end();
});

test('noop: postcode doesnt match regex', function(t) {
const doc = new Document('mysource','address','myid');

// set postcode
doc.setAddress('zip', 'E81DN');

// no alias added
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set');

t.end();
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
return tape('post/alphanumeric_postcodes: ' + name, testFunction);
}

for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
7 changes: 4 additions & 3 deletions test/run.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
var tape = require('tape');
var common = {};
const tape = require('tape');
const common = {};

var tests = [
const tests = [
require('./Document.js'),
require('./errors.js'),
require('./document/centroid.js'),
Expand All @@ -23,6 +23,7 @@ var tests = [
require('./document/toESDocument.js'),
require('./document/post.js'),
require('./post/intersections.js'),
require('./post/alphanumeric_postcodes.js'),
require('./post/deduplication.js'),
require('./post/seperable_street_names.js'),
require('./post/language_field_trimming.js'),
Expand Down
Loading