Skip to content

Commit

Permalink
Merge pull request #138 from guardian/handle-media-download-failures
Browse files Browse the repository at this point in the history
Handle media download failures
  • Loading branch information
philmcmahon authored Feb 12, 2025
2 parents a1e223d + d4c7b83 commit c28f93b
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 12 deletions.
1 change: 1 addition & 0 deletions packages/cdk/lib/transcription-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ export class TranscriptionService extends GuStack {
resources: [
transcriptionTaskQueue.queueArn,
transcriptionGpuTaskQueue.queueArn,
transcriptionOutputQueue.queueArn,
],
}),
new PolicyStatement({
Expand Down
14 changes: 10 additions & 4 deletions packages/common/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ export const TranscriptionJob = z.object({

export type TranscriptionJob = z.infer<typeof TranscriptionJob>;

const TranscriptionOutputBase = z.object({
const OutputBase = z.object({
id: z.string(),
originalFilename: z.string(),
userEmail: z.string(),
});

const TranscriptionOutputBase = OutputBase.extend({
originalFilename: z.string(),
isTranslation: z.boolean(),
});

Expand All @@ -88,8 +91,7 @@ export const TranscriptionOutputSuccess = TranscriptionOutputBase.extend({
translationOutputBucketKeys: z.optional(OutputBucketKeys),
});

export const MediaDownloadFailure = z.object({
id: z.string(),
export const MediaDownloadFailure = OutputBase.extend({
status: z.literal('MEDIA_DOWNLOAD_FAILURE'),
url: z.string(),
});
Expand Down Expand Up @@ -123,6 +125,10 @@ export const transcriptionOutputIsTranscriptionFailure = (
): output is TranscriptionOutputFailure =>
output.status === 'TRANSCRIPTION_FAILURE';

export const transcriptionOutputIsMediaDownloadFailure = (
output: TranscriptionOutput,
): output is MediaDownloadFailure => output.status === 'MEDIA_DOWNLOAD_FAILURE';

export type TranscriptionOutput = z.infer<typeof TranscriptionOutput>;

export const SignedUrlResponseBody = z.object({
Expand Down
1 change: 1 addition & 0 deletions packages/media-download/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ const reportDownloadFailure = async (
id: job.id,
status: 'MEDIA_DOWNLOAD_FAILURE',
url: job.url,
userEmail: job.userEmail,
};
const result = await sendMessage(
sqsClient,
Expand Down
71 changes: 63 additions & 8 deletions packages/output-handler/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import {
TranscriptionOutputFailure,
transcriptionOutputIsTranscriptionFailure,
TranscriptionDynamoItem,
transcriptionOutputIsMediaDownloadFailure,
MediaDownloadFailure,
} from '@guardian/transcription-service-common';
import {
MetricsService,
Expand All @@ -41,7 +43,7 @@ const successMessageBody = (
`;
};

const failureMessageBody = (
const transcriptionFailureMessageBody = (
originalFilename: string,
id: string,
isTranslation: boolean,
Expand All @@ -51,11 +53,22 @@ const failureMessageBody = (
<h1>${isTranslation ? 'English translation ' : 'Transcription'}for ${originalFilename} has failed.</h1>
<p>Please make sure that the file is a valid audio or video file.</p>
<p>Click <a href="${sourceMediaDownloadUrl}">here</a> to download the input media.</p>
<p>Contact digital.investigations@guardian.co.uk for support.</p>
<p>Contact digital.investigations@theguardian.com for support.</p>
<p>Transcription ID: ${id}</p>
`;
};

const mediaDownloadFailureMessageBody = (url: string) => {
return `
<h1>Media download failed for ${url}</h1>
<p>You recently requested a transcription of the media at this url ${url}. Unfortunately, the transcription service
was unable to download the media for transcription.</p>
<p>This might be because the url is for an unsupported website. For a list of supported sites, see
<a href="https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md">here</a>.</p>
<p>Please contact digital.investigations@theguardian.com for further assistance.</p>
`;
};

const handleTranscriptionSuccess = async (
config: TranscriptionConfig,
transcriptionOutput: TranscriptionOutputSuccess,
Expand Down Expand Up @@ -125,19 +138,51 @@ const handleTranscriptionFailure = async (
config.app.emailNotificationFromAddress,
transcriptionOutput.userEmail,
`${transcriptionOutput.isTranslation ? 'English translation ' : 'Transcription'} failed for ${transcriptionOutput.originalFilename}`,
failureMessageBody(
transcriptionFailureMessageBody(
transcriptionOutput.originalFilename,
transcriptionOutput.id,
transcriptionOutput.isTranslation,
sourceMediaDownloadUrl,
),
);

logger.info('Output handler successfully sent failure email notification', {
id: transcriptionOutput.id,
filename: transcriptionOutput.originalFilename,
userEmail: transcriptionOutput.userEmail,
});
logger.info(
'Output handler successfully sent transcription failure email notification',
{
id: transcriptionOutput.id,
filename: transcriptionOutput.originalFilename,
userEmail: transcriptionOutput.userEmail,
},
);
} catch (error) {
logger.error('Failed to process sqs failure message', error);
await metrics.putMetric(FailureMetric);
}
};

const handleMediaDownloadFailure = async (
config: TranscriptionConfig,
failure: MediaDownloadFailure,
sesClient: SESClient,
metrics: MetricsService,
) => {
try {
await sendEmail(
sesClient,
config.app.emailNotificationFromAddress,
failure.userEmail,
`Media download failed for ${failure.url}`,
mediaDownloadFailureMessageBody(failure.url),
);

logger.info(
'Output handler successfully sent media download failure email notification',
{
id: failure.id,
url: failure.url,
userEmail: failure.userEmail,
},
);
} catch (error) {
logger.error('Failed to process sqs failure message', error);
await metrics.putMetric(FailureMetric);
Expand Down Expand Up @@ -199,6 +244,16 @@ const processMessage = async (event: unknown) => {
metrics,
sourceMediaDownloadUrl,
);
} else if (transcriptionOutputIsMediaDownloadFailure(transcriptionOutput)) {
logger.info(
`Handling media download failure. Output: ${JSON.stringify(transcriptionOutput)}`,
);
await handleMediaDownloadFailure(
config,
transcriptionOutput,
sesClient,
metrics,
);
}
}
};
Expand Down

0 comments on commit c28f93b

Please sign in to comment.