Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BD-27] FEAT: Add new API endpoint for uploading transcripts #27844

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 192 additions & 0 deletions cms/djangoapps/contentstore/views/tests/test_transcript_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from unittest.mock import ANY, Mock, patch

import ddt
from django.test.client import Client
from django.test.testcases import TestCase
from django.urls import reverse
from edxval import api
Expand All @@ -14,6 +15,7 @@
from cms.djangoapps.contentstore.utils import reverse_course_url
from common.djangoapps.student.roles import CourseStaffRole
from openedx.core.djangoapps.profile_images.tests.helpers import make_image_file
from openedx.core.djangoapps.oauth_dispatch.jwt import create_jwt_for_user

from ..transcript_settings import TranscriptionProviderErrorType, validate_transcript_credentials

Expand Down Expand Up @@ -550,3 +552,193 @@ def test_transcript_delete_handler(self, is_staff, is_course_staff):
))
self.assertEqual(response.status_code, 200)
self.assertFalse(api.get_video_transcript_data(video_id=video_id, language_code=language_code))


@ddt.ddt
class TranscriptUploadApiTest(CourseTestCase):
"""
Tests for transcript upload handler.
"""
def setUp(self):
super().setUp()
jwt_headers = {
'HTTP_AUTHORIZATION': 'JWT ' + create_jwt_for_user(self.user)
}
self.client = Client(**jwt_headers)

@property
def view_url(self):
"""
Returns url for this view
"""
return reverse('transcript_upload_api')

def test_401_without_authentication(self):
"""
Verify that redirection happens in case of an unauthenticated request.
"""
response = self.client.post(self.view_url, content_type='application/json', HTTP_AUTHORIZATION='')
self.assertEqual(response.status_code, 401)

def test_405_with_not_allowed_request_method(self):
"""
Verify that 405 is returned in case of not-allowed request methods.
Allowed request methods include POST.
"""
response = self.client.get(self.view_url, content_type='application/json')
self.assertEqual(response.status_code, 405)

@patch('cms.djangoapps.contentstore.views.transcript_settings.create_or_update_video_transcript')
@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en']),
)
def test_transcript_upload_handler(self, mock_create_or_update_video_transcript):
"""
Tests that transcript upload handler works as expected.
"""
transcript_file_stream = StringIO('0\n00:00:00,010 --> 00:00:00,100\nПривіт, edX вітає вас.\n\n')
# Make request to transcript upload handler
response = self.client.post(
self.view_url,
{
'edx_video_id': '123',
'language_code': 'en',
'new_language_code': 'es',
'file': transcript_file_stream,
},
format='multipart'
)

self.assertEqual(response.status_code, 201)
mock_create_or_update_video_transcript.assert_called_with(
video_id='123',
language_code='en',
metadata={
'language_code': 'es',
'file_format': 'sjson',
'provider': 'Custom'
},
file_data=ANY,
)

@ddt.data(
(
{
'edx_video_id': '123',
'language_code': 'en',
'new_language_code': 'en',
},
'A transcript file is required.'
),
(
{
'language_code': 'en',
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
},
'The following parameters are required: edx_video_id, new_language_code.'
),
(
{
'language_code': 'en',
'new_language_code': 'en',
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
},
'The following parameters are required: edx_video_id.'
),
(
{
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
},
'The following parameters are required: edx_video_id, language_code, new_language_code.'
)
)
@ddt.unpack
@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en']),
)
def test_transcript_upload_handler_missing_attrs(self, request_payload, expected_error_message):
"""
Tests the transcript upload handler when the required attributes are missing.
"""
# Make request to transcript upload handler
response = self.client.post(self.view_url, request_payload, format='multipart')
self.assertEqual(response.status_code, 400)
self.assertEqual(json.loads(response.content.decode('utf-8'))['error'], expected_error_message)

@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en', 'es'])
)
def test_transcript_upload_handler_existing_transcript(self):
"""
Tests that upload handler do not update transcript's language if a transcript
with the same language already present for an edx_video_id.
"""
# Make request to transcript upload handler
request_payload = {
'edx_video_id': '1234',
'language_code': 'en',
'new_language_code': 'es'
}
response = self.client.post(self.view_url, request_payload, format='multipart')
self.assertEqual(response.status_code, 400)
self.assertEqual(
json.loads(response.content.decode('utf-8'))['error'],
'A transcript with the "es" language code already exists.'
)

@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en']),
)
def test_transcript_upload_handler_with_image(self):
"""
Tests the transcript upload handler with an image file.
"""
with make_image_file() as image_file:
# Make request to transcript upload handler
response = self.client.post(
self.view_url,
{
'edx_video_id': '123',
'language_code': 'en',
'new_language_code': 'es',
'file': image_file,
},
format='multipart'
)

self.assertEqual(response.status_code, 400)
self.assertEqual(
json.loads(response.content.decode('utf-8'))['error'],
'There is a problem with this transcript file. Try to upload a different file.'
)

@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en']),
)
def test_transcript_upload_handler_with_invalid_transcript(self):
"""
Tests the transcript upload handler with an invalid transcript file.
"""
transcript_file_stream = StringIO('An invalid transcript SubRip file content')
# Make request to transcript upload handler
response = self.client.post(
self.view_url,
{
'edx_video_id': '123',
'language_code': 'en',
'new_language_code': 'es',
'file': transcript_file_stream,
},
format='multipart'
)

self.assertEqual(response.status_code, 400)
self.assertEqual(
json.loads(response.content.decode('utf-8'))['error'],
'There is a problem with this transcript file. Try to upload a different file.'
)
95 changes: 65 additions & 30 deletions cms/djangoapps/contentstore/views/transcript_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@
update_transcript_credentials_state_for_org
)
from opaque_keys.edx.keys import CourseKey
from rest_framework.decorators import api_view
from rest_framework.response import Response

from common.djangoapps.student.auth import has_studio_write_access
from common.djangoapps.util.json_request import JsonResponse, expect_json
from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcription_service_credentials
from openedx.core.lib.api.view_utils import view_auth_classes
from xmodule.video_module.transcripts_utils import Transcript, TranscriptsGenerationException

from .videos import TranscriptProvider
Expand All @@ -33,7 +36,8 @@
'transcript_credentials_handler',
'transcript_download_handler',
'transcript_upload_handler',
'transcript_delete_handler'
'transcript_delete_handler',
'transcript_upload_api',
]

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -173,6 +177,41 @@ def transcript_download_handler(request):
return response


def upload_transcript(request):
edx_video_id = request.POST['edx_video_id']
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.FILES['file']
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
sjson_subs = Transcript.convert(
content=transcript_file.read().decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'provider': TranscriptProvider.CUSTOM,
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
response = JsonResponse(status=201)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we please add logging statement for the success case? It really helps monitoring

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a message here as well, also including the edX video ID and transcript language.

except (TranscriptsGenerationException, UnicodeDecodeError):
LOGGER.error("Unable to update transcript on edX video %s for language %s", edx_video_id, new_language_code)
response = JsonResponse(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please log this error!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a log message here. Does the language look alright?

{'error': _('There is a problem with this transcript file. Try to upload a different file.')},
status=400
)
finally:
LOGGER.info("Updated transcript on edX video %s for language %s", edx_video_id, new_language_code)
return response


def validate_transcript_upload_data(data, files):
"""
Validates video transcript file.
Expand Down Expand Up @@ -202,6 +241,30 @@ def validate_transcript_upload_data(data, files):
return error


@api_view(['POST'])
@view_auth_classes()
@expect_json
def transcript_upload_api(request):
"""
API View for uploading transcript files.

Arguments:
request: A WSGI request object

Transcript file in SRT format

Returns:
- A 400 if any validation fails
- A 200 if the transcript has been uploaded successfully
"""
error = validate_transcript_upload_data(data=request.POST, files=request.FILES)
if error:
response = JsonResponse({'error': error}, status=400)
else:
response = upload_transcript(request)
return response


@login_required
@require_POST
def transcript_upload_handler(request):
Expand All @@ -222,35 +285,7 @@ def transcript_upload_handler(request):
if error:
response = JsonResponse({'error': error}, status=400)
else:
edx_video_id = request.POST['edx_video_id']
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.FILES['file']
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
sjson_subs = Transcript.convert(
content=transcript_file.read().decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'provider': TranscriptProvider.CUSTOM,
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
response = JsonResponse(status=201)
except (TranscriptsGenerationException, UnicodeDecodeError):
response = JsonResponse(
{'error': _('There is a problem with this transcript file. Try to upload a different file.')},
status=400
)

response = upload_transcript(request)
return response


Expand Down
1 change: 1 addition & 0 deletions cms/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
url(r'^transcript_delete/{}(?:/(?P<edx_video_id>[-\w]+))?(?:/(?P<language_code>[^/]*))?$'.format(
settings.COURSE_KEY_PATTERN
), contentstore_views.transcript_delete_handler, name='transcript_delete_handler'),
url(r'^transcript_upload_api/$', contentstore_views.transcript_upload_api, name='transcript_upload_api'),
url(fr'^video_encodings_download/{settings.COURSE_KEY_PATTERN}$',
contentstore_views.video_encodings_download, name='video_encodings_download'),
url(fr'^group_configurations/{settings.COURSE_KEY_PATTERN}$',
Expand Down