Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support translation of input audio to english #100

Merged
merged 6 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion packages/api/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,35 @@ const getApp = async () => {
body.data.fileName,
signedUrl,
body.data.languageCode,
false,
);
if (isSqsFailure(sendResult)) {
res.status(500).send(sendResult.errorMsg);
return;
}
if (body.data.translationRequested) {
const translationSendResult =
await generateOutputSignedUrlAndSendMessage(
s3Key,
sqsClient,
config.app.taskQueueUrl,
config.app.transcriptionOutputBucket,
config.aws.region,
userEmail,
body.data.fileName,
signedUrl,
body.data.languageCode,
true,
);
if (isSqsFailure(translationSendResult)) {
res
.status(500)
.send(
`Translation request failed: ${translationSendResult.errorMsg}`,
);
return;
}
}
logger.info('API successfully sent the message to SQS', {
id: s3Key,
filename: body.data.fileName,
Expand Down Expand Up @@ -209,7 +233,7 @@ const getApp = async () => {
}
const exportResult = await createTranscriptDocument(
config,
`${parsedItem.data.originalFilename} transcript`,
`${parsedItem.data.originalFilename} transcript${parsedItem.data.isTranslation ? ' (English translation)' : ''}`,
exportRequest.data.oAuthTokenResponse,
transcriptText.text,
);
Expand Down
1 change: 1 addition & 0 deletions packages/backend-common/src/dynamodb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export const TranscriptionDynamoItem = z.object({
transcriptKeys: TranscriptKeys,
userEmail: z.string(),
completedAt: z.optional(z.string()), // dynamodb can't handle dates so we need to use an ISO date
isTranslation: z.boolean(),
});

export type TranscriptionDynamoItem = z.infer<typeof TranscriptionDynamoItem>;
Expand Down
22 changes: 13 additions & 9 deletions packages/backend-common/src/sqs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export const isSqsFailure = (
): result is SQSFailure => result.status === AWSStatus.Failure;

export const generateOutputSignedUrlAndSendMessage = async (
id: string,
s3Key: string,
client: SQSClient,
queueUrl: string,
outputBucket: string,
Expand All @@ -66,27 +66,30 @@ export const generateOutputSignedUrlAndSendMessage = async (
originalFilename: string,
inputSignedUrl: string,
languageCode: LanguageCode,
translate: boolean,
): Promise<SendResult> => {
const signedUrls = await generateOutputSignedUrls(
id,
s3Key,
region,
outputBucket,
userEmail,
originalFilename,
7,
translate,
);

const jobId = translate ? `${s3Key}-translation` : s3Key;
const job: TranscriptionJob = {
id, // id of the source file
id: jobId, // id of the source file
inputSignedUrl,
sentTimestamp: new Date().toISOString(),
userEmail,
transcriptDestinationService: DestinationService.TranscriptionService,
originalFilename,
outputBucketUrls: signedUrls,
languageCode,
translate,
};
return await sendMessage(client, queueUrl, JSON.stringify(job), id);
return await sendMessage(client, queueUrl, JSON.stringify(job), s3Key);
};

const sendMessage = async (
Expand Down Expand Up @@ -278,13 +281,14 @@ const generateOutputSignedUrls = async (
region: string,
outputBucket: string,
userEmail: string,
originalFilename: string,
expiresInDays: number,
translate: boolean,
): Promise<OutputBucketUrls> => {
const fileName = `${id}${translate ? '-translation' : ''}`;
const expiresIn = expiresInDays * 24 * 60 * 60;
const srtKey = `srt/${id}.srt`;
const jsonKey = `json/${id}.json`;
const textKey = `text/${id}.txt`;
const srtKey = `srt/${fileName}.srt`;
const jsonKey = `json/${fileName}.json`;
const textKey = `text/${fileName}.txt`;
const srtSignedS3Url = await getSignedUploadUrl(
region,
outputBucket,
Expand Down
53 changes: 48 additions & 5 deletions packages/client/src/components/UploadForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ import {
TranscribeFileRequestBody,
} from '@guardian/transcription-service-common';
import { AuthContext } from '@/app/template';
import { FileInput, Label, Select } from 'flowbite-react';
import { Checkbox, FileInput, Label, Select } from 'flowbite-react';
import { RequestStatus } from '@/types';
import { iconForStatus, InfoMessage } from '@/components/InfoMessage';

const uploadFileAndTranscribe = async (
file: File,
token: string,
languageCode: LanguageCode,
translationRequested: boolean,
) => {
const blob = new Blob([file as BlobPart]);

Expand All @@ -42,6 +43,7 @@ const uploadFileAndTranscribe = async (
s3Key: body.data.s3Key,
fileName: file.name,
languageCode,
translationRequested,
};

const sendMessageResponse = await authFetch('/api/transcribe-file', token, {
Expand Down Expand Up @@ -83,6 +85,8 @@ export const UploadForm = () => {
const [languageCodeValid, setLanguageCodeValid] = useState<
boolean | undefined
>(undefined);
const [translationRequested, setTranslationRequested] =
useState<boolean>(false);
const { token } = useContext(AuthContext);

const reset = () => {
Expand Down Expand Up @@ -143,10 +147,24 @@ export const UploadForm = () => {
role="alert"
>
<span className="font-medium">Upload complete. </span>{' '}
Transcription in progress - check your email for the completed
transcript. The service can take a few minutes to start up, but
thereafter the transcription process is typically shorter than the
length of the media file.{' '}
<p>
Transcription in progress - check your email for the completed
transcript.{' '}
</p>
<div className="font-medium">
<p>
{' '}
The service can take a few minutes to start up, but thereafter
the transcription process is typically shorter than the length
of the media file.{' '}
</p>
<p>
If you have requested a translation, you will receive 2 emails
- one for the transcription in the original language, another
for the english translation. The emails will arrive at
different times
</p>
</div>
<button
onClick={() => reset()}
className="font-medium text-blue-600 underline dark:text-blue-500 hover:no-underline"
Expand Down Expand Up @@ -191,6 +209,7 @@ export const UploadForm = () => {
file,
token,
mediaFileLanguageCode,
translationRequested,
);
if (!result) {
setUploads((prev) =>
Expand Down Expand Up @@ -261,6 +280,30 @@ export const UploadForm = () => {
))}
</Select>
</div>
{mediaFileLanguageCode !== 'en' && (
<div className="mb-6">
<div className="flex gap-2">
<div className="flex h-5 items-center">
<Checkbox
id="translation"
checked={translationRequested}
onChange={() =>
setTranslationRequested(!translationRequested)
}
/>
</div>
<div className="flex flex-col">
<Label htmlFor="shipping">Request English translation</Label>
<div className="text-gray-500 dark:text-gray-300">
<span className="text-xs font-normal">
You will receive two documents - a transcript in the
original language and a translation in English.
</span>
</div>
</div>
</div>
</div>
)}
<button
type="submit"
className="text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm w-full sm:w-auto px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800"
Expand Down
3 changes: 3 additions & 0 deletions packages/common/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export const TranscriptionJob = z.object({
transcriptDestinationService: z.nativeEnum(DestinationService),
outputBucketUrls: OutputBucketUrls,
languageCode: zodLanguageCode,
translate: z.boolean(),
});

export type TranscriptionJob = z.infer<typeof TranscriptionJob>;
Expand All @@ -53,6 +54,7 @@ const TranscriptionOutputBase = z.object({
id: z.string(),
originalFilename: z.string(),
userEmail: z.string(),
isTranslation: z.boolean(),
});

export const TranscriptionOutputSuccess = TranscriptionOutputBase.extend({
Expand Down Expand Up @@ -140,6 +142,7 @@ export const transcribeFileRequestBody = z.object({
s3Key: z.string(),
fileName: z.string(),
languageCode: zodLanguageCode,
translationRequested: z.boolean(),
});
export type TranscribeFileRequestBody = z.infer<
typeof transcribeFileRequestBody
Expand Down
18 changes: 13 additions & 5 deletions packages/output-handler/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,23 @@ const successMessageBody = (
transcriptId: string,
originalFilename: string,
rootUrl: string,
isTranslation: boolean,
): string => {
const exportUrl = `${rootUrl}/export?transcriptId=${transcriptId}`;
return `
<h1>Transcript for ${originalFilename} ready</h1>
<h1>${isTranslation ? 'English translation ' : 'Transcription'} for ${originalFilename} ready</h1>
<p>Click <a href="${exportUrl}">here</a> to export to a google doc.</p>
<p><b>Note:</b> transcripts will expire after 7 days. Export your transcript to a doc now if you want to keep it. </p>
`;
};

const failureMessageBody = (originalFilename: string, id: string): string => {
const failureMessageBody = (
originalFilename: string,
id: string,
isTranslation: boolean,
): string => {
return `
<h1>Transcription for ${originalFilename} has failed.</h1>
<h1>${isTranslation ? 'English translation ' : 'Transcription'}for ${originalFilename} has failed.</h1>
<p>Please make sure that the file is a valid audio or video file.</p>
<p>Contact the digital investigations team for support.</p>
<p>Transcription ID: ${id}</p>
Expand All @@ -61,6 +66,7 @@ const handleTranscriptionSuccess = async (
},
userEmail: transcriptionOutput.userEmail,
completedAt: new Date().toISOString(),
isTranslation: transcriptionOutput.isTranslation,
};

try {
Expand All @@ -74,11 +80,12 @@ const handleTranscriptionSuccess = async (
sesClient,
config.app.emailNotificationFromAddress,
transcriptionOutput.userEmail,
`Transcription complete for ${transcriptionOutput.originalFilename}`,
`${transcriptionOutput.isTranslation ? 'English translation' : 'Transcription'} complete for ${transcriptionOutput.originalFilename}`,
successMessageBody(
transcriptionOutput.id,
transcriptionOutput.originalFilename,
config.app.rootUrl,
transcriptionOutput.isTranslation,
),
);

Expand Down Expand Up @@ -107,10 +114,11 @@ const handleTranscriptionFailure = async (
sesClient,
config.app.emailNotificationFromAddress,
transcriptionOutput.userEmail,
`Transcription failed for ${transcriptionOutput.originalFilename}`,
`${transcriptionOutput.isTranslation ? 'English translation ' : 'Transcription'} failed for ${transcriptionOutput.originalFilename}`,
failureMessageBody(
transcriptionOutput.originalFilename,
transcriptionOutput.id,
transcriptionOutput.isTranslation,
),
);

Expand Down
3 changes: 3 additions & 0 deletions packages/worker/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ const publishTranscriptionOutputFailure = async (
status: 'FAILURE',
userEmail: job.userEmail,
originalFilename: job.originalFilename,
isTranslation: job.translate,
};
try {
await publishTranscriptionOutput(sqsClient, destination, failureMessage);
Expand Down Expand Up @@ -251,6 +252,7 @@ const pollTranscriptionQueue = async (
numberOfThreads,
config.app.stage === 'PROD' ? 'medium' : 'tiny',
job.languageCode,
job.translate,
);

// if we've received an interrupt signal we don't want to perform a half-finished transcript upload/publish as
Expand Down Expand Up @@ -286,6 +288,7 @@ const pollTranscriptionQueue = async (
userEmail: job.userEmail,
originalFilename: job.originalFilename,
outputBucketKeys,
isTranslation: job.translate,
};

await publishTranscriptionOutput(
Expand Down
11 changes: 9 additions & 2 deletions packages/worker/src/transcribe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ export const getTranscriptionText = async (
numberOfThreads: number,
model: WhisperModel,
languageCode: LanguageCode,
translate: boolean,
): Promise<TranscriptionResult> => {
try {
const { fileName, metadata } = await transcribe(
Expand All @@ -176,6 +177,7 @@ export const getTranscriptionText = async (
numberOfThreads,
model,
languageCode,
translate,
);

const srtPath = path.resolve(path.parse(file).dir, `${fileName}.srt`);
Expand All @@ -190,7 +192,7 @@ export const getTranscriptionText = async (

return { transcripts, metadata };
} catch (error) {
logger.error(`Could not read the transcripts result`);
logger.error(`Could not read the transcript result`);
throw error;
}
};
Expand Down Expand Up @@ -223,6 +225,7 @@ export const transcribe = async (
numberOfThreads: number,
model: WhisperModel,
languageCode: LanguageCode,
translate: boolean,
) => {
const fileName = path.parse(file).name;
const containerOutputFilePath = path.resolve(CONTAINER_FOLDER, fileName);
Expand All @@ -246,10 +249,14 @@ export const transcribe = async (
containerOutputFilePath,
'--language',
languageCode,
`${translate ? '--translate' : ''}`,
]);
const metadata = extractWhisperStderrData(result.stderr);
logger.info('Transcription finished successfully', metadata);
return { fileName, metadata };
return {
fileName: `${fileName}`,
metadata,
};
} catch (error) {
logger.error(`Transcription failed due to `, error);
throw error;
Expand Down
Loading