Skip to content

Commit

Permalink
Merge pull request #162 from guardian/aap-world
Browse files Browse the repository at this point in the history
Add AAP World bucket
  • Loading branch information
sb-dev authored Feb 21, 2025
2 parents dcb6e59 + 78176b8 commit 6847b26
Show file tree
Hide file tree
Showing 5 changed files with 667 additions and 8 deletions.
37 changes: 36 additions & 1 deletion ingestion-lambda/src/categoryCodes.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { processFingerpostAPCategoryCodes } from './categoryCodes';
import {processFingerpostAAPCategoryCodes, processFingerpostAPCategoryCodes} from './categoryCodes';

describe('processFingerpostAPCategoryCodes', () => {
it('should return an empty array if provided with an empty array', () => {
Expand Down Expand Up @@ -55,3 +55,38 @@ describe('processFingerpostAPCategoryCodes', () => {
).toEqual(['apCat:a', 'apCat:c']);
});
});

describe('processFingerpostAAPCategoryCodes', () => {
it('should return an empty array if provided with an empty array', () => {
expect(processFingerpostAAPCategoryCodes([])).toEqual([]);
});

it('should strip out non-valid news codes', () => {
expect(processFingerpostAAPCategoryCodes(['and'])).toEqual([]);
});

it('should process valid news codes', () => {
expect(
processFingerpostAAPCategoryCodes([
'04007003+food',
'goods|04013002+food',
'and',
'medtop:20000049'
]),
).toEqual(['medtop:20000049', 'subj:04007003', 'subj:04013002']);
});

it('should flatten embedded news codes', () => {
expect(
processFingerpostAAPCategoryCodes([
'11001000+defence|11001005+military',
'equipment|11002000+diplomacy|04015001+air',
]),
).toEqual([
'subj:11001000',
'subj:11001005',
'subj:11002000',
'subj:04015001',
]);
});
});
17 changes: 17 additions & 0 deletions ingestion-lambda/src/categoryCodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,20 @@ export function processFingerpostAPCategoryCodes(original: string[]): string[] {
const deduped = [...new Set(allCategoryCodes)];
return deduped;
}

export function processFingerpostAAPCategoryCodes(categoryCodes: string[]): string[] {
const allCategoryCodes = categoryCodes
.flatMap((categoryCode) => categoryCode.split('|'))

const mediaTopics = allCategoryCodes
.filter((_) => _.split(':').length > 1)

const legacySubjectCodes = allCategoryCodes
.filter((_) => _.split('+').length > 1)
.map((categoryCode) => {
const [ code, _label ] = categoryCode.split('+');
return `subj:${code}`
});

return [...mediaTopics, ...legacySubjectCodes]
}
24 changes: 17 additions & 7 deletions ingestion-lambda/src/handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { createLogger } from '../../shared/lambda-logging';
import { createDbConnection } from '../../shared/rds';
import type { IngestorInputBody } from '../../shared/types';
import { IngestorInputBodySchema } from '../../shared/types';
import { processFingerpostAPCategoryCodes } from './categoryCodes';
import {processFingerpostAAPCategoryCodes, processFingerpostAPCategoryCodes} from './categoryCodes';
import { tableName } from './database';
import { BUCKET_NAME, s3Client } from './s3';
import { lookupSupplier } from './suppliers';
Expand Down Expand Up @@ -52,6 +52,21 @@ export const processKeywords = (
return cleanAndDedupeKeywords(keywords.split('+'));
};

const processCategoryCodes = (supplier: string | undefined, subjectCodes: string[]) => {
switch (supplier) {
case 'AP':
return processFingerpostAPCategoryCodes(
subjectCodes,
);
case 'AAP':
return processFingerpostAAPCategoryCodes(
subjectCodes,
);
default:
return [];
}
}

const safeBodyParse = (body: string): IngestorInputBody => {
try {
const json = JSON.parse(body) as Record<string, unknown>;
Expand Down Expand Up @@ -139,12 +154,7 @@ export const main = async (event: SQSEvent): Promise<SQSBatchResponse> => {

const supplier = lookupSupplier(snsMessageContent['source-feed']);

const categoryCodes =
supplier === 'AP'
? processFingerpostAPCategoryCodes(
snsMessageContent.subjects?.code ?? [],
)
: [];
const categoryCodes = processCategoryCodes(supplier, snsMessageContent.subjects?.code ?? [])

const result = await sql`
INSERT INTO ${sql(tableName)}
Expand Down
Loading

0 comments on commit 6847b26

Please sign in to comment.