From d31115441d24012f7a42b62b3d55ba6966d174a3 Mon Sep 17 00:00:00 2001 From: Josh McLaughlin Date: Fri, 4 Jun 2021 08:32:02 -0700 Subject: [PATCH 1/2] FEAT: Add new API endpoint for uploading transcripts --- .../views/tests/test_transcript_settings.py | 192 ++++++++++++++++++ .../contentstore/views/transcript_settings.py | 93 ++++++--- cms/urls.py | 1 + 3 files changed, 256 insertions(+), 30 deletions(-) diff --git a/cms/djangoapps/contentstore/views/tests/test_transcript_settings.py b/cms/djangoapps/contentstore/views/tests/test_transcript_settings.py index ad31d2f934b2..be4b920d497c 100644 --- a/cms/djangoapps/contentstore/views/tests/test_transcript_settings.py +++ b/cms/djangoapps/contentstore/views/tests/test_transcript_settings.py @@ -6,6 +6,7 @@ from unittest.mock import ANY, Mock, patch import ddt +from django.test.client import Client from django.test.testcases import TestCase from django.urls import reverse from edxval import api @@ -14,6 +15,7 @@ from cms.djangoapps.contentstore.utils import reverse_course_url from common.djangoapps.student.roles import CourseStaffRole from openedx.core.djangoapps.profile_images.tests.helpers import make_image_file +from openedx.core.djangoapps.oauth_dispatch.jwt import create_jwt_for_user from ..transcript_settings import TranscriptionProviderErrorType, validate_transcript_credentials @@ -550,3 +552,193 @@ def test_transcript_delete_handler(self, is_staff, is_course_staff): )) self.assertEqual(response.status_code, 200) self.assertFalse(api.get_video_transcript_data(video_id=video_id, language_code=language_code)) + + +@ddt.ddt +class TranscriptUploadApiTest(CourseTestCase): + """ + Tests for transcript upload handler. + """ + def setUp(self): + super().setUp() + jwt_headers = { + 'HTTP_AUTHORIZATION': 'JWT ' + create_jwt_for_user(self.user) + } + self.client = Client(**jwt_headers) + + @property + def view_url(self): + """ + Returns url for this view + """ + return reverse('transcript_upload_api') + + def test_401_without_authentication(self): + """ + Verify that redirection happens in case of an unauthenticated request. + """ + response = self.client.post(self.view_url, content_type='application/json', HTTP_AUTHORIZATION='') + self.assertEqual(response.status_code, 401) + + def test_405_with_not_allowed_request_method(self): + """ + Verify that 405 is returned in case of not-allowed request methods. + Allowed request methods include POST. + """ + response = self.client.get(self.view_url, content_type='application/json') + self.assertEqual(response.status_code, 405) + + @patch('cms.djangoapps.contentstore.views.transcript_settings.create_or_update_video_transcript') + @patch( + 'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages', + Mock(return_value=['en']), + ) + def test_transcript_upload_handler(self, mock_create_or_update_video_transcript): + """ + Tests that transcript upload handler works as expected. + """ + transcript_file_stream = StringIO('0\n00:00:00,010 --> 00:00:00,100\nПривіт, edX вітає вас.\n\n') + # Make request to transcript upload handler + response = self.client.post( + self.view_url, + { + 'edx_video_id': '123', + 'language_code': 'en', + 'new_language_code': 'es', + 'file': transcript_file_stream, + }, + format='multipart' + ) + + self.assertEqual(response.status_code, 201) + mock_create_or_update_video_transcript.assert_called_with( + video_id='123', + language_code='en', + metadata={ + 'language_code': 'es', + 'file_format': 'sjson', + 'provider': 'Custom' + }, + file_data=ANY, + ) + + @ddt.data( + ( + { + 'edx_video_id': '123', + 'language_code': 'en', + 'new_language_code': 'en', + }, + 'A transcript file is required.' + ), + ( + { + 'language_code': 'en', + 'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n' + }, + 'The following parameters are required: edx_video_id, new_language_code.' + ), + ( + { + 'language_code': 'en', + 'new_language_code': 'en', + 'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n' + }, + 'The following parameters are required: edx_video_id.' + ), + ( + { + 'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n' + }, + 'The following parameters are required: edx_video_id, language_code, new_language_code.' + ) + ) + @ddt.unpack + @patch( + 'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages', + Mock(return_value=['en']), + ) + def test_transcript_upload_handler_missing_attrs(self, request_payload, expected_error_message): + """ + Tests the transcript upload handler when the required attributes are missing. + """ + # Make request to transcript upload handler + response = self.client.post(self.view_url, request_payload, format='multipart') + self.assertEqual(response.status_code, 400) + self.assertEqual(json.loads(response.content.decode('utf-8'))['error'], expected_error_message) + + @patch( + 'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages', + Mock(return_value=['en', 'es']) + ) + def test_transcript_upload_handler_existing_transcript(self): + """ + Tests that upload handler do not update transcript's language if a transcript + with the same language already present for an edx_video_id. + """ + # Make request to transcript upload handler + request_payload = { + 'edx_video_id': '1234', + 'language_code': 'en', + 'new_language_code': 'es' + } + response = self.client.post(self.view_url, request_payload, format='multipart') + self.assertEqual(response.status_code, 400) + self.assertEqual( + json.loads(response.content.decode('utf-8'))['error'], + 'A transcript with the "es" language code already exists.' + ) + + @patch( + 'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages', + Mock(return_value=['en']), + ) + def test_transcript_upload_handler_with_image(self): + """ + Tests the transcript upload handler with an image file. + """ + with make_image_file() as image_file: + # Make request to transcript upload handler + response = self.client.post( + self.view_url, + { + 'edx_video_id': '123', + 'language_code': 'en', + 'new_language_code': 'es', + 'file': image_file, + }, + format='multipart' + ) + + self.assertEqual(response.status_code, 400) + self.assertEqual( + json.loads(response.content.decode('utf-8'))['error'], + 'There is a problem with this transcript file. Try to upload a different file.' + ) + + @patch( + 'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages', + Mock(return_value=['en']), + ) + def test_transcript_upload_handler_with_invalid_transcript(self): + """ + Tests the transcript upload handler with an invalid transcript file. + """ + transcript_file_stream = StringIO('An invalid transcript SubRip file content') + # Make request to transcript upload handler + response = self.client.post( + self.view_url, + { + 'edx_video_id': '123', + 'language_code': 'en', + 'new_language_code': 'es', + 'file': transcript_file_stream, + }, + format='multipart' + ) + + self.assertEqual(response.status_code, 400) + self.assertEqual( + json.loads(response.content.decode('utf-8'))['error'], + 'There is a problem with this transcript file. Try to upload a different file.' + ) diff --git a/cms/djangoapps/contentstore/views/transcript_settings.py b/cms/djangoapps/contentstore/views/transcript_settings.py index 3d2243cca03a..430ce12a9ac5 100644 --- a/cms/djangoapps/contentstore/views/transcript_settings.py +++ b/cms/djangoapps/contentstore/views/transcript_settings.py @@ -20,11 +20,14 @@ update_transcript_credentials_state_for_org ) from opaque_keys.edx.keys import CourseKey +from rest_framework.decorators import api_view +from rest_framework.response import Response from common.djangoapps.student.auth import has_studio_write_access from common.djangoapps.util.json_request import JsonResponse, expect_json from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcription_service_credentials +from openedx.core.lib.api.view_utils import view_auth_classes from xmodule.video_module.transcripts_utils import Transcript, TranscriptsGenerationException from .videos import TranscriptProvider @@ -33,7 +36,8 @@ 'transcript_credentials_handler', 'transcript_download_handler', 'transcript_upload_handler', - 'transcript_delete_handler' + 'transcript_delete_handler', + 'transcript_upload_api', ] LOGGER = logging.getLogger(__name__) @@ -173,6 +177,39 @@ def transcript_download_handler(request): return response +def upload_transcript(request): + edx_video_id = request.POST['edx_video_id'] + language_code = request.POST['language_code'] + new_language_code = request.POST['new_language_code'] + transcript_file = request.FILES['file'] + try: + # Convert SRT transcript into an SJSON format + # and upload it to S3. + sjson_subs = Transcript.convert( + content=transcript_file.read().decode('utf-8'), + input_format=Transcript.SRT, + output_format=Transcript.SJSON + ).encode() + create_or_update_video_transcript( + video_id=edx_video_id, + language_code=language_code, + metadata={ + 'provider': TranscriptProvider.CUSTOM, + 'file_format': Transcript.SJSON, + 'language_code': new_language_code + }, + file_data=ContentFile(sjson_subs), + ) + response = JsonResponse(status=201) + except (TranscriptsGenerationException, UnicodeDecodeError): + response = JsonResponse( + {'error': _('There is a problem with this transcript file. Try to upload a different file.')}, + status=400 + ) + finally: + return response + + def validate_transcript_upload_data(data, files): """ Validates video transcript file. @@ -202,6 +239,30 @@ def validate_transcript_upload_data(data, files): return error +@api_view(['POST']) +@view_auth_classes() +@expect_json +def transcript_upload_api(request): + """ + API View for uploading transcript files. + + Arguments: + request: A WSGI request object + + Transcript file in SRT format + + Returns: + - A 400 if any validation fails + - A 200 if the transcript has been uploaded successfully + """ + error = validate_transcript_upload_data(data=request.POST, files=request.FILES) + if error: + response = JsonResponse({'error': error}, status=400) + else: + response = upload_transcript(request) + return response + + @login_required @require_POST def transcript_upload_handler(request): @@ -222,35 +283,7 @@ def transcript_upload_handler(request): if error: response = JsonResponse({'error': error}, status=400) else: - edx_video_id = request.POST['edx_video_id'] - language_code = request.POST['language_code'] - new_language_code = request.POST['new_language_code'] - transcript_file = request.FILES['file'] - try: - # Convert SRT transcript into an SJSON format - # and upload it to S3. - sjson_subs = Transcript.convert( - content=transcript_file.read().decode('utf-8'), - input_format=Transcript.SRT, - output_format=Transcript.SJSON - ).encode() - create_or_update_video_transcript( - video_id=edx_video_id, - language_code=language_code, - metadata={ - 'provider': TranscriptProvider.CUSTOM, - 'file_format': Transcript.SJSON, - 'language_code': new_language_code - }, - file_data=ContentFile(sjson_subs), - ) - response = JsonResponse(status=201) - except (TranscriptsGenerationException, UnicodeDecodeError): - response = JsonResponse( - {'error': _('There is a problem with this transcript file. Try to upload a different file.')}, - status=400 - ) - + response = upload_transcript(request) return response diff --git a/cms/urls.py b/cms/urls.py index 1934c0e201e5..10ca275e4f67 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -169,6 +169,7 @@ url(r'^transcript_delete/{}(?:/(?P[-\w]+))?(?:/(?P[^/]*))?$'.format( settings.COURSE_KEY_PATTERN ), contentstore_views.transcript_delete_handler, name='transcript_delete_handler'), + url(r'^transcript_upload_api/$', contentstore_views.transcript_upload_api, name='transcript_upload_api'), url(fr'^video_encodings_download/{settings.COURSE_KEY_PATTERN}$', contentstore_views.video_encodings_download, name='video_encodings_download'), url(fr'^group_configurations/{settings.COURSE_KEY_PATTERN}$', From ed2bfc844627d4c54ba414396f3be7cbec57495f Mon Sep 17 00:00:00 2001 From: Josh McLaughlin Date: Mon, 7 Jun 2021 13:01:42 -0700 Subject: [PATCH 2/2] Add log messages to transcript upload function --- cms/djangoapps/contentstore/views/transcript_settings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cms/djangoapps/contentstore/views/transcript_settings.py b/cms/djangoapps/contentstore/views/transcript_settings.py index 430ce12a9ac5..7f06abea0162 100644 --- a/cms/djangoapps/contentstore/views/transcript_settings.py +++ b/cms/djangoapps/contentstore/views/transcript_settings.py @@ -202,11 +202,13 @@ def upload_transcript(request): ) response = JsonResponse(status=201) except (TranscriptsGenerationException, UnicodeDecodeError): + LOGGER.error("Unable to update transcript on edX video %s for language %s", edx_video_id, new_language_code) response = JsonResponse( {'error': _('There is a problem with this transcript file. Try to upload a different file.')}, status=400 ) finally: + LOGGER.info("Updated transcript on edX video %s for language %s", edx_video_id, new_language_code) return response