Skip to content

Commit

Permalink
Merge pull request #153 from guardian/pf/rename-iptccat-to-ap
Browse files Browse the repository at this point in the history
Rename cat codes labelled 'iptccat' as 'apCat' instead
  • Loading branch information
bryophyta authored Feb 17, 2025
2 parents a4e5cbd + 6bffd96 commit f202256
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 12 deletions.
14 changes: 7 additions & 7 deletions ingestion-lambda/src/categoryCodes.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ describe('processFingerpostAPCategoryCodes', () => {
expect(processFingerpostAPCategoryCodes(['service:news'])).toEqual([]);
});

it('should return simple category codes as they were received', () => {
it('should return simple codes labelled "iptccat" as simple "apCat" codes', () => {
expect(
processFingerpostAPCategoryCodes(['iptccat:a', 'iptccat:b']),
).toEqual(['iptccat:a', 'iptccat:b']);
).toEqual(['apCat:a', 'apCat:b']);
});

it('should expand category codes with multiple subcodes', () => {
expect(processFingerpostAPCategoryCodes(['iptccat:c+d'])).toEqual([
'iptccat:c',
'iptccat:d',
'apCat:c',
'apCat:d',
]);
});

Expand All @@ -31,7 +31,7 @@ describe('processFingerpostAPCategoryCodes', () => {
it('should remove empty strings', () => {
expect(
processFingerpostAPCategoryCodes(['iptccat:a', '', 'iptccat:c']),
).toEqual(['iptccat:a', 'iptccat:c']);
).toEqual(['apCat:a', 'apCat:c']);
});

it('should remove trailing and leading whitespace', () => {
Expand All @@ -42,7 +42,7 @@ describe('processFingerpostAPCategoryCodes', () => {
' service:news ',
'qCode:value ',
]),
).toEqual(['iptccat:a', 'iptccat:c', 'qCode:value']);
).toEqual(['apCat:a', 'apCat:c', 'qCode:value']);
});

it('should deduplicate category codes after stripping whitespace', () => {
Expand All @@ -52,6 +52,6 @@ describe('processFingerpostAPCategoryCodes', () => {
' iptccat:a',
'iptccat:c',
]),
).toEqual(['iptccat:a', 'iptccat:c']);
).toEqual(['apCat:a', 'apCat:c']);
});
});
15 changes: 10 additions & 5 deletions ingestion-lambda/src/categoryCodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,26 @@ function partition<T>(
return [first, second];
}

/**
* We receive AP codes from Fingerpost in the format `prefix:code1+code2+code3:code4+code5`.
* At the time of writing these are AP category codes, but mislabelled as `iptccat` codes.
* This function transforms the prefix, and splits the codes into individual category codes.
*/
function flattenCategoryCodes(categoryCodes: string): string[] {
const [prefix, ...codes] = categoryCodes.split(':');
return codes
.flatMap((_) => _.split('+'))
.flatMap((code) => `${prefix}:${code}`);
.map((code) => `${prefix?.trim() === 'iptccat' ? 'apCat' : prefix}:${code}`);
}

export function processFingerpostAPCategoryCodes(original: string[]): string[] {
const remainingNotServiceCodes = original.filter((_) => !_.includes('service:'));
const [iptccatCodes, rest] = partition(remainingNotServiceCodes, (code) =>
const notServiceCodes = original.filter((_) => !_.includes('service:')); // we aren't interested in keeping the service codes here
const [categoryCodes, rest] = partition(notServiceCodes, (code) =>
code.includes('iptccat:'),
);
const transformedIptccatCodes = iptccatCodes.flatMap(flattenCategoryCodes);
const transformedCategoryCodes = categoryCodes.flatMap(flattenCategoryCodes);

const allCategoryCodes = [...transformedIptccatCodes, ...rest]
const allCategoryCodes = [...transformedCategoryCodes, ...rest]
.map((_) => _.trim())
.filter((_) => _.length > 0);
const deduped = [...new Set(allCategoryCodes)];
Expand Down

0 comments on commit f202256

Please sign in to comment.