Skip to content

Commit

Permalink
feat(post): add alias for compound street names with abbreviated generic
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Jan 13, 2022
1 parent 94a7f30 commit 2cb45c1
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 31 deletions.
23 changes: 23 additions & 0 deletions post/_contractions_abbreviated.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"DEU": {
"platz": "pl",
"markt": "mkt",
"straße": "str",
"strasse": "str"
},
"CHE": {
"platz": "pl",
"markt": "mkt",
"straße": "str",
"strasse": "str"
},
"AUT": {
"platz": "pl",
"markt": "mkt",
"straße": "str",
"strasse": "str"
},
"NLD": {
"straat": "str"
}
}
46 changes: 24 additions & 22 deletions post/seperable_street_names.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const _ = require('lodash');
const TARGET_LAYERS = [ 'street', 'address', 'intersection' ];
const expansions = require('./_expansions.json');
const contractions = require('./_contractions.json');
const contractions_abbreviated = require('./_contractions_abbreviated.json');

function expand(str, mapping) {
let tokens = str.split(' ');
Expand Down Expand Up @@ -38,61 +39,55 @@ function contract(str, mapping) {
function expandAllFields(doc, mapping){

// index expanded version of default name
const name = doc.getName('default');
if (_.isString(name) && !_.isEmpty(name)) {
_.castArray(_.get(doc.name, 'default', [])).forEach(name => {
const expanded = expand(name, mapping);
if (_.isString(expanded) && !_.isEmpty(expanded) && (name !== expanded)) {
doc.setNameAlias('default', expanded);
}
}
});

// index expanded version of street name
const street = doc.getAddress('street');
if (_.isString(street) && !_.isEmpty(street)) {
_.castArray(_.get(doc.address_parts, 'street', [])).forEach(street => {
const expanded = expand(street, mapping);
if (_.isString(expanded) && !_.isEmpty(expanded) && (street !== expanded)) {
doc.setAddressAlias('street', expanded);
}
}
});

// index expanded version of cross_street name
const cross_street = doc.getAddress('cross_street');
if (_.isString(cross_street) && !_.isEmpty(cross_street)) {
_.castArray(_.get(doc.address_parts, 'cross_street', [])).forEach(cross_street => {
const expanded = expand(cross_street, mapping);
if (_.isString(expanded) && !_.isEmpty(expanded) && (cross_street !== expanded)) {
doc.setAddressAlias('cross_street', expanded);
}
}
});
}

function contractAllFields(doc, mapping) {

// index expanded version of default name
const name = doc.getName('default');
if (_.isString(name) && !_.isEmpty(name)) {
_.castArray(_.get(doc.name, 'default', [])).forEach(name => {
const contracted = contract(name, mapping);
if (_.isString(contracted) && !_.isEmpty(contracted) && (name !== contracted)) {
doc.setNameAlias('default', contracted);
}
}
});

// index contracted version of street name
const street = doc.getAddress('street');
if (_.isString(street) && !_.isEmpty(street)) {
_.castArray(_.get(doc.address_parts, 'street', [])).forEach(street => {
const contracted = contract(street, mapping);
if (_.isString(contracted) && !_.isEmpty(contracted) && (street !== contracted)) {
doc.setAddressAlias('street', contracted);
}
}
});

// index contracted version of cross_street name
const cross_street = doc.getAddress('cross_street');
if (_.isString(cross_street) && !_.isEmpty(cross_street)) {
_.castArray(_.get(doc.address_parts, 'cross_street', [])).forEach(cross_street => {
const contracted = contract(cross_street, mapping);
if (_.isString(contracted) && !_.isEmpty(contracted) && (cross_street !== contracted)) {
doc.setAddressAlias('cross_street', contracted);
}
}
});
}

function post(doc) {
Expand All @@ -101,20 +96,27 @@ function post(doc) {
if( !TARGET_LAYERS.includes( doc.getLayer() ) ) { return; }

// detect document country code
let docCountryCode = _.get(doc, 'parent.country_a[0]');
let docCountryCode = _.get(doc, 'parent.country_a[0]') || _.get(doc, 'parent.dependency_a[0]');
if( !_.isString(docCountryCode) || docCountryCode.length !== 3 ) { return; }
docCountryCode = docCountryCode.toUpperCase();

// expansions
let mapping_expansions = expansions[docCountryCode.toUpperCase()];
const mapping_expansions = expansions[docCountryCode];
if( _.isObject( mapping_expansions ) ) {
expandAllFields(doc, mapping_expansions);
}

// contractions
let mapping_contractions = contractions[docCountryCode.toUpperCase()];
const mapping_contractions = contractions[docCountryCode];
if( _.isObject( mapping_contractions ) ) {
contractAllFields(doc, mapping_contractions);
}

// abbreviated contractions
const mapping_contractions_abbr = contractions_abbreviated[docCountryCode];
if (_.isObject(mapping_contractions_abbr)) {
contractAllFields(doc, mapping_contractions_abbr);
}
}

module.exports = {
Expand All @@ -125,4 +127,4 @@ module.exports = {
contractions: contractions,
contract: contract,
contractAllFields: contractAllFields
};
};
72 changes: 63 additions & 9 deletions test/post/seperable_street_names.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@

var Document = require('../../Document');
var ssn = require('../../post/seperable_street_names');
const _ = require('lodash');
const Document = require('../../Document');
const ssn = require('../../post/seperable_street_names');
const dedupe = require('../../post/deduplication');

module.exports.tests = {};

module.exports.tests.expand = function (test) {
test('expand DEU', function (t) {
test('expand DEU', function (t) {
t.equals('Example Weg', ssn.expand('Examplew.', ssn.expansions.DEU) );
t.equals('Example Weg', ssn.expand('Exampleweg', ssn.expansions.DEU) );
t.equals('Example Quelle', ssn.expand('Exampleq.', ssn.expansions.DEU) );
Expand Down Expand Up @@ -180,17 +181,23 @@ module.exports.tests.functional = function (test) {

// name aliases defined
t.deepEqual(doc.getNameAliases('default'), [
'Example Straße & Cross Platz'
'Example Straße & Cross Platz',
'Examplestraße & Crossplatz',
'Examplestr & Crosspl'
]);

// street aliases defined
t.deepEqual(doc.getAddressAliases('street'), [
'Example Straße',
'Examplestraße',
'Examplestr'
]);

// cross_street aliases defined
t.deepEqual(doc.getAddressAliases('cross_street'), [
'Cross Platz',
'Crossplatz',
'Crosspl'
]);

t.end();
Expand All @@ -208,17 +215,20 @@ module.exports.tests.functional = function (test) {

// name aliases defined
t.deepEqual(doc.getNameAliases('default'), [
'Examplestraße & Crossplatz'
'Examplestraße & Crossplatz',
'Examplestr & Crosspl'
]);

// street aliases defined
t.deepEqual(doc.getAddressAliases('street'), [
'Examplestraße',
'Examplestr'
]);

// cross_street aliases defined
t.deepEqual(doc.getAddressAliases('cross_street'), [
'Crossplatz',
'Crosspl'
]);

t.end();
Expand All @@ -235,12 +245,14 @@ module.exports.tests.functional = function (test) {

// name aliases defined
t.deepEqual(doc.getNameAliases('default'), [
'Eberswalderstraße'
'Eberswalderstraße',
'Eberswalderstr'
]);

// street aliases defined
t.deepEqual(doc.getAddressAliases('street'), [
'Eberswalderstraße'
'Eberswalderstraße',
'Eberswalderstr'
]);

t.end();
Expand All @@ -259,17 +271,23 @@ module.exports.tests.functional = function (test) {
// name aliases defined
t.deepEqual(doc.getNameAliases('default'), [
'Example Straße & Cross Platz',
'Examplestraße & Crossplatz'
'Examplestraße & Crossplatz',
'Examplestraße & Crossplatz',
'Examplestr & Crossplatz',
'Examplestr & Crosspl'
]);

// street aliases defined
t.deepEqual(doc.getAddressAliases('street'), [
'Examplestraße',
'Examplestr'
]);

// cross_street aliases defined
t.deepEqual(doc.getAddressAliases('cross_street'), [
'Cross Platz',
'Crossplatz',
'Crosspl'
]);

t.end();
Expand All @@ -292,6 +310,42 @@ module.exports.tests.functional = function (test) {
t.doesNotThrow(() => ssn.post(doc));
t.end();
});

test('germanic separable street names', function (t) {

let generate = (input) => {
var doc = new Document('mysource', 'street', 'myid');
doc.addParent('country', 'Germany', '1001', 'DEU');
doc.setName('default', input);
doc.setAddress('street', input);
doc.setAddress('cross_street', input);
ssn.post(doc);
dedupe(doc);

return doc;
};

// test all permutations expand to all forms
// Separated / Compounded + Abbreviated / Compounded Non-Abbreviated
// note: Separated tokens are easily handled by elasticsearch synonyms
// and so do not require explicit substiution here.
t.deepEqual(_.castArray(generate('Foostrasse').name.default), ['Foostrasse', 'Foo Strasse', 'Foostr']);
t.deepEqual(_.castArray(generate('Foostraße').name.default), ['Foostraße', 'Foo Straße', 'Foostr']);
t.deepEqual(_.castArray(generate('Foostr.').name.default), ['Foostr.', 'Foo Straße', 'Foostraße']);
t.deepEqual(_.castArray(generate('Foostr').name.default), ['Foostr', 'Foo Straße', 'Foostraße']);
t.deepEqual(_.castArray(generate('Foo Strasse').name.default), ['Foo Strasse', 'Foostrasse', 'Foostr']);
t.deepEqual(_.castArray(generate('Foo Straße').name.default), ['Foo Straße', 'Foostraße', 'Foostr']);

// note: these forms with the abbreviated generic are not handled within this script.
// I considered adding synonym substitution functionality but it's complex and better
// handled by https://github.com/pelias/openaddresses/pull/477
// note: as a general rule, names at index-time should be provided un-abbreviated but may
// be in either abbreviated on un-abbreviated at search time.
// t.deepEqual(_.castArray(generate('Foo Str.').name.default), ['Foo Str.', 'Foostraße', 'Foostr']);
// t.deepEqual(_.castArray(generate('Foo Str').name.default), ['Foo Str', 'Foostraße', 'Foostr']);

t.end();
});
};

module.exports.all = function (tape, common) {
Expand Down

0 comments on commit 2cb45c1

Please sign in to comment.