Skip to content

Commit

Permalink
Merge pull request #100 from guardian/support-translation
Browse files Browse the repository at this point in the history
Support translation of input audio to english
  • Loading branch information
philmcmahon authored Sep 3, 2024
2 parents 373be0f + 7257460 commit e37afee
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 22 deletions.
26 changes: 25 additions & 1 deletion packages/api/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,35 @@ const getApp = async () => {
body.data.fileName,
signedUrl,
body.data.languageCode,
false,
);
if (isSqsFailure(sendResult)) {
res.status(500).send(sendResult.errorMsg);
return;
}
if (body.data.translationRequested) {
const translationSendResult =
await generateOutputSignedUrlAndSendMessage(
s3Key,
sqsClient,
config.app.taskQueueUrl,
config.app.transcriptionOutputBucket,
config.aws.region,
userEmail,
body.data.fileName,
signedUrl,
body.data.languageCode,
true,
);
if (isSqsFailure(translationSendResult)) {
res
.status(500)
.send(
`Translation request failed: ${translationSendResult.errorMsg}`,
);
return;
}
}
logger.info('API successfully sent the message to SQS', {
id: s3Key,
filename: body.data.fileName,
Expand Down Expand Up @@ -209,7 +233,7 @@ const getApp = async () => {
}
const exportResult = await createTranscriptDocument(
config,
`${parsedItem.data.originalFilename} transcript`,
`${parsedItem.data.originalFilename} transcript${parsedItem.data.isTranslation ? ' (English translation)' : ''}`,
exportRequest.data.oAuthTokenResponse,
transcriptText.text,
);
Expand Down
1 change: 1 addition & 0 deletions packages/backend-common/src/dynamodb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export const TranscriptionDynamoItem = z.object({
transcriptKeys: TranscriptKeys,
userEmail: z.string(),
completedAt: z.optional(z.string()), // dynamodb can't handle dates so we need to use an ISO date
isTranslation: z.boolean(),
});

export type TranscriptionDynamoItem = z.infer<typeof TranscriptionDynamoItem>;
Expand Down
22 changes: 13 additions & 9 deletions packages/backend-common/src/sqs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export const isSqsFailure = (
): result is SQSFailure => result.status === AWSStatus.Failure;

export const generateOutputSignedUrlAndSendMessage = async (
id: string,
s3Key: string,
client: SQSClient,
queueUrl: string,
outputBucket: string,
Expand All @@ -66,27 +66,30 @@ export const generateOutputSignedUrlAndSendMessage = async (
originalFilename: string,
inputSignedUrl: string,
languageCode: LanguageCode,
translate: boolean,
): Promise<SendResult> => {
const signedUrls = await generateOutputSignedUrls(
id,
s3Key,
region,
outputBucket,
userEmail,
originalFilename,
7,
translate,
);

const jobId = translate ? `${s3Key}-translation` : s3Key;
const job: TranscriptionJob = {
id, // id of the source file
id: jobId, // id of the source file
inputSignedUrl,
sentTimestamp: new Date().toISOString(),
userEmail,
transcriptDestinationService: DestinationService.TranscriptionService,
originalFilename,
outputBucketUrls: signedUrls,
languageCode,
translate,
};
return await sendMessage(client, queueUrl, JSON.stringify(job), id);
return await sendMessage(client, queueUrl, JSON.stringify(job), s3Key);
};

const sendMessage = async (
Expand Down Expand Up @@ -278,13 +281,14 @@ const generateOutputSignedUrls = async (
region: string,
outputBucket: string,
userEmail: string,
originalFilename: string,
expiresInDays: number,
translate: boolean,
): Promise<OutputBucketUrls> => {
const fileName = `${id}${translate ? '-translation' : ''}`;
const expiresIn = expiresInDays * 24 * 60 * 60;
const srtKey = `srt/${id}.srt`;
const jsonKey = `json/${id}.json`;
const textKey = `text/${id}.txt`;
const srtKey = `srt/${fileName}.srt`;
const jsonKey = `json/${fileName}.json`;
const textKey = `text/${fileName}.txt`;
const srtSignedS3Url = await getSignedUploadUrl(
region,
outputBucket,
Expand Down
53 changes: 48 additions & 5 deletions packages/client/src/components/UploadForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ import {
TranscribeFileRequestBody,
} from '@guardian/transcription-service-common';
import { AuthContext } from '@/app/template';
import { FileInput, Label, Select } from 'flowbite-react';
import { Checkbox, FileInput, Label, Select } from 'flowbite-react';
import { RequestStatus } from '@/types';
import { iconForStatus, InfoMessage } from '@/components/InfoMessage';

const uploadFileAndTranscribe = async (
file: File,
token: string,
languageCode: LanguageCode,
translationRequested: boolean,
) => {
const blob = new Blob([file as BlobPart]);

Expand All @@ -42,6 +43,7 @@ const uploadFileAndTranscribe = async (
s3Key: body.data.s3Key,
fileName: file.name,
languageCode,
translationRequested,
};

const sendMessageResponse = await authFetch('/api/transcribe-file', token, {
Expand Down Expand Up @@ -83,6 +85,8 @@ export const UploadForm = () => {
const [languageCodeValid, setLanguageCodeValid] = useState<
boolean | undefined
>(undefined);
const [translationRequested, setTranslationRequested] =
useState<boolean>(false);
const { token } = useContext(AuthContext);

const reset = () => {
Expand Down Expand Up @@ -143,10 +147,24 @@ export const UploadForm = () => {
role="alert"
>
<span className="font-medium">Upload complete. </span>{' '}
Transcription in progress - check your email for the completed
transcript. The service can take a few minutes to start up, but
thereafter the transcription process is typically shorter than the
length of the media file.{' '}
<p>
Transcription in progress - check your email for the completed
transcript.{' '}
</p>
<div className="font-medium">
<p>
{' '}
The service can take a few minutes to start up, but thereafter
the transcription process is typically shorter than the length
of the media file.{' '}
</p>
<p>
If you have requested a translation, you will receive 2 emails
- one for the transcription in the original language, another
for the english translation. The emails will arrive at
different times
</p>
</div>
<button
onClick={() => reset()}
className="font-medium text-blue-600 underline dark:text-blue-500 hover:no-underline"
Expand Down Expand Up @@ -191,6 +209,7 @@ export const UploadForm = () => {
file,
token,
mediaFileLanguageCode,
translationRequested,
);
if (!result) {
setUploads((prev) =>
Expand Down Expand Up @@ -261,6 +280,30 @@ export const UploadForm = () => {
))}
</Select>
</div>
{mediaFileLanguageCode !== 'en' && (
<div className="mb-6">
<div className="flex gap-2">
<div className="flex h-5 items-center">
<Checkbox
id="translation"
checked={translationRequested}
onChange={() =>
setTranslationRequested(!translationRequested)
}
/>
</div>
<div className="flex flex-col">
<Label htmlFor="shipping">Request English translation</Label>
<div className="text-gray-500 dark:text-gray-300">
<span className="text-xs font-normal">
You will receive two documents - a transcript in the
original language and a translation in English.
</span>
</div>
</div>
</div>
</div>
)}
<button
type="submit"
className="text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm w-full sm:w-auto px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800"
Expand Down
3 changes: 3 additions & 0 deletions packages/common/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export const TranscriptionJob = z.object({
transcriptDestinationService: z.nativeEnum(DestinationService),
outputBucketUrls: OutputBucketUrls,
languageCode: zodLanguageCode,
translate: z.boolean(),
});

export type TranscriptionJob = z.infer<typeof TranscriptionJob>;
Expand All @@ -53,6 +54,7 @@ const TranscriptionOutputBase = z.object({
id: z.string(),
originalFilename: z.string(),
userEmail: z.string(),
isTranslation: z.boolean(),
});

export const TranscriptionOutputSuccess = TranscriptionOutputBase.extend({
Expand Down Expand Up @@ -140,6 +142,7 @@ export const transcribeFileRequestBody = z.object({
s3Key: z.string(),
fileName: z.string(),
languageCode: zodLanguageCode,
translationRequested: z.boolean(),
});
export type TranscribeFileRequestBody = z.infer<
typeof transcribeFileRequestBody
Expand Down
18 changes: 13 additions & 5 deletions packages/output-handler/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,23 @@ const successMessageBody = (
transcriptId: string,
originalFilename: string,
rootUrl: string,
isTranslation: boolean,
): string => {
const exportUrl = `${rootUrl}/export?transcriptId=${transcriptId}`;
return `
<h1>Transcript for ${originalFilename} ready</h1>
<h1>${isTranslation ? 'English translation ' : 'Transcription'} for ${originalFilename} ready</h1>
<p>Click <a href="${exportUrl}">here</a> to export to a google doc.</p>
<p><b>Note:</b> transcripts will expire after 7 days. Export your transcript to a doc now if you want to keep it. </p>
`;
};

const failureMessageBody = (originalFilename: string, id: string): string => {
const failureMessageBody = (
originalFilename: string,
id: string,
isTranslation: boolean,
): string => {
return `
<h1>Transcription for ${originalFilename} has failed.</h1>
<h1>${isTranslation ? 'English translation ' : 'Transcription'}for ${originalFilename} has failed.</h1>
<p>Please make sure that the file is a valid audio or video file.</p>
<p>Contact the digital investigations team for support.</p>
<p>Transcription ID: ${id}</p>
Expand All @@ -61,6 +66,7 @@ const handleTranscriptionSuccess = async (
},
userEmail: transcriptionOutput.userEmail,
completedAt: new Date().toISOString(),
isTranslation: transcriptionOutput.isTranslation,
};

try {
Expand All @@ -74,11 +80,12 @@ const handleTranscriptionSuccess = async (
sesClient,
config.app.emailNotificationFromAddress,
transcriptionOutput.userEmail,
`Transcription complete for ${transcriptionOutput.originalFilename}`,
`${transcriptionOutput.isTranslation ? 'English translation' : 'Transcription'} complete for ${transcriptionOutput.originalFilename}`,
successMessageBody(
transcriptionOutput.id,
transcriptionOutput.originalFilename,
config.app.rootUrl,
transcriptionOutput.isTranslation,
),
);

Expand Down Expand Up @@ -107,10 +114,11 @@ const handleTranscriptionFailure = async (
sesClient,
config.app.emailNotificationFromAddress,
transcriptionOutput.userEmail,
`Transcription failed for ${transcriptionOutput.originalFilename}`,
`${transcriptionOutput.isTranslation ? 'English translation ' : 'Transcription'} failed for ${transcriptionOutput.originalFilename}`,
failureMessageBody(
transcriptionOutput.originalFilename,
transcriptionOutput.id,
transcriptionOutput.isTranslation,
),
);

Expand Down
3 changes: 3 additions & 0 deletions packages/worker/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ const publishTranscriptionOutputFailure = async (
status: 'FAILURE',
userEmail: job.userEmail,
originalFilename: job.originalFilename,
isTranslation: job.translate,
};
try {
await publishTranscriptionOutput(sqsClient, destination, failureMessage);
Expand Down Expand Up @@ -251,6 +252,7 @@ const pollTranscriptionQueue = async (
numberOfThreads,
config.app.stage === 'PROD' ? 'medium' : 'tiny',
job.languageCode,
job.translate,
);

// if we've received an interrupt signal we don't want to perform a half-finished transcript upload/publish as
Expand Down Expand Up @@ -286,6 +288,7 @@ const pollTranscriptionQueue = async (
userEmail: job.userEmail,
originalFilename: job.originalFilename,
outputBucketKeys,
isTranslation: job.translate,
};

await publishTranscriptionOutput(
Expand Down
11 changes: 9 additions & 2 deletions packages/worker/src/transcribe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ export const getTranscriptionText = async (
numberOfThreads: number,
model: WhisperModel,
languageCode: LanguageCode,
translate: boolean,
): Promise<TranscriptionResult> => {
try {
const { fileName, metadata } = await transcribe(
Expand All @@ -176,6 +177,7 @@ export const getTranscriptionText = async (
numberOfThreads,
model,
languageCode,
translate,
);

const srtPath = path.resolve(path.parse(file).dir, `${fileName}.srt`);
Expand All @@ -190,7 +192,7 @@ export const getTranscriptionText = async (

return { transcripts, metadata };
} catch (error) {
logger.error(`Could not read the transcripts result`);
logger.error(`Could not read the transcript result`);
throw error;
}
};
Expand Down Expand Up @@ -223,6 +225,7 @@ export const transcribe = async (
numberOfThreads: number,
model: WhisperModel,
languageCode: LanguageCode,
translate: boolean,
) => {
const fileName = path.parse(file).name;
const containerOutputFilePath = path.resolve(CONTAINER_FOLDER, fileName);
Expand All @@ -246,10 +249,14 @@ export const transcribe = async (
containerOutputFilePath,
'--language',
languageCode,
`${translate ? '--translate' : ''}`,
]);
const metadata = extractWhisperStderrData(result.stderr);
logger.info('Transcription finished successfully', metadata);
return { fileName, metadata };
return {
fileName: `${fileName}`,
metadata,
};
} catch (error) {
logger.error(`Transcription failed due to `, error);
throw error;
Expand Down

0 comments on commit e37afee

Please sign in to comment.