diff --git a/cms/djangoapps/contentstore/helpers.py b/cms/djangoapps/contentstore/helpers.py index 1ceb2bc3c01d..32626f263dec 100644 --- a/cms/djangoapps/contentstore/helpers.py +++ b/cms/djangoapps/contentstore/helpers.py @@ -10,6 +10,7 @@ import re from attrs import frozen, Factory +from django.core.files.base import ContentFile from django.conf import settings from django.contrib.auth import get_user_model from django.utils.translation import gettext as _ @@ -23,6 +24,12 @@ from xmodule.exceptions import NotFoundError from xmodule.modulestore.django import modulestore from xmodule.xml_block import XmlMixin +from xmodule.video_block.transcripts_utils import Transcript, build_components_import_path +from edxval.api import ( + create_external_video, + create_or_update_video_transcript, + delete_video_transcript, +) from cms.djangoapps.models.settings.course_grading import CourseGradingModel from cms.lib.xblock.upstream_sync import UpstreamLink, UpstreamLinkException, fetch_customizable_fields @@ -274,8 +281,14 @@ def _insert_static_files_into_downstream_xblock( course_key=downstream_xblock.context_key, staged_content_id=staged_content_id, static_files=static_files, - usage_key=downstream_xblock.scope_ids.usage_id, + usage_key=downstream_xblock.usage_key, ) + if downstream_xblock.usage_key.block_type == 'video': + _import_transcripts( + downstream_xblock, + staged_content_id=staged_content_id, + static_files=static_files, + ) # Rewrite the OLX's static asset references to point to the new # locations for those assets. See _import_files_into_course for more @@ -331,6 +344,13 @@ def import_staged_content_from_user_clipboard(parent_key: UsageKey, request) -> tags=user_clipboard.content.tags, ) + usage_key = new_xblock.usage_key + if usage_key.block_type == 'video': + # The edx_video_id must always be new so as not + # to interfere with the data of the copied block + new_xblock.edx_video_id = create_external_video(display_name='external video') + store.update_item(new_xblock, request.user.id) + notices = _insert_static_files_into_downstream_xblock(new_xblock, user_clipboard.content.id, request) return new_xblock, notices @@ -630,8 +650,8 @@ def _import_file_into_course( # we're not going to attempt to change. if clipboard_file_path.startswith('static/'): # If it's in this form, it came from a library and assumes component-local assets - file_path = clipboard_file_path.lstrip('static/') - import_path = f"components/{usage_key.block_type}/{usage_key.block_id}/{file_path}" + file_path = clipboard_file_path.removeprefix('static/') + import_path = build_components_import_path(usage_key, file_path) filename = pathlib.Path(file_path).name new_key = course_key.make_asset_key("asset", import_path.replace("/", "_")) else: @@ -672,6 +692,62 @@ def _import_file_into_course( return False, {} +def _import_transcripts( + block: XBlock, + staged_content_id: int, + static_files: list[content_staging_api.StagedContentFileData], +): + """ + Adds transcripts to VAL using the new edx_video_id. + """ + for file_data_obj in static_files: + clipboard_file_path = file_data_obj.filename + data = content_staging_api.get_staged_content_static_file_data( + staged_content_id, + clipboard_file_path + ) + if data is None: + raise NotFoundError(file_data_obj.source_key) + + if clipboard_file_path.startswith('static/'): + # If it's in this form, it came from a library and assumes component-local assets + file_path = clipboard_file_path.removeprefix('static/') + else: + # Otherwise it came from a course... + file_path = clipboard_file_path + + filename = pathlib.Path(file_path).name + + language_code = next((k for k, v in block.transcripts.items() if v == filename), None) + if language_code: + sjson_subs = Transcript.convert( + content=data, + input_format=Transcript.SRT, + output_format=Transcript.SJSON + ).encode() + create_or_update_video_transcript( + video_id=block.edx_video_id, + language_code=language_code, + metadata={ + 'file_format': Transcript.SJSON, + 'language_code': language_code + }, + file_data=ContentFile(sjson_subs), + ) + + +def clear_transcripts(block: XBlock): + """ + Deletes all transcripts of a video block + """ + for language_code in block.transcripts.keys(): + delete_video_transcript( + video_id=block.edx_video_id, + language_code=language_code, + ) + block.transcripts = {} + + def is_item_in_course_tree(item): """ Check that the item is in the course tree. diff --git a/cms/djangoapps/contentstore/rest_api/v2/views/downstreams.py b/cms/djangoapps/contentstore/rest_api/v2/views/downstreams.py index 46e67e87ea0c..739f33d014f1 100644 --- a/cms/djangoapps/contentstore/rest_api/v2/views/downstreams.py +++ b/cms/djangoapps/contentstore/rest_api/v2/views/downstreams.py @@ -73,7 +73,9 @@ UpstreamLink, UpstreamLinkException, NoUpstream, BadUpstream, BadDownstream, fetch_customizable_fields, sync_from_upstream, decline_sync, sever_upstream_link ) -from cms.djangoapps.contentstore.helpers import import_static_assets_for_library_sync +from cms.djangoapps.contentstore.helpers import ( + import_static_assets_for_library_sync, clear_transcripts +) from common.djangoapps.student.auth import has_studio_write_access, has_studio_read_access from openedx.core.lib.api.view_utils import ( DeveloperErrorViewMixin, @@ -198,6 +200,9 @@ def post(self, request: _AuthenticatedRequest, usage_key_string: str) -> Respons """ downstream = _load_accessible_block(request.user, usage_key_string, require_write_access=True) try: + if downstream.usage_key.block_type == "video": + # Delete all transcripts so we can copy new ones from upstream + clear_transcripts(downstream) upstream = sync_from_upstream(downstream, request.user) static_file_notices = import_static_assets_for_library_sync(downstream, upstream, request) except UpstreamLinkException as exc: diff --git a/cms/djangoapps/contentstore/views/tests/test_transcripts.py b/cms/djangoapps/contentstore/views/tests/test_transcripts.py index 95fbaccbab7c..61c0bd81b9c8 100644 --- a/cms/djangoapps/contentstore/views/tests/test_transcripts.py +++ b/cms/djangoapps/contentstore/views/tests/test_transcripts.py @@ -15,9 +15,11 @@ from django.urls import reverse from edxval.api import create_video from opaque_keys.edx.keys import UsageKey +from organizations.tests.factories import OrganizationFactory from cms.djangoapps.contentstore.tests.utils import CourseTestCase, setup_caption_responses from openedx.core.djangoapps.contentserver.caching import del_cached_content +from openedx.core.djangoapps.content_libraries import api as lib_api from xmodule.contentstore.content import StaticContent # lint-amnesty, pylint: disable=wrong-import-order from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order from xmodule.exceptions import NotFoundError # lint-amnesty, pylint: disable=wrong-import-order @@ -27,8 +29,10 @@ GetTranscriptsFromYouTubeException, Transcript, get_video_transcript_content, - remove_subs_from_store + get_transcript, + remove_subs_from_store, ) +from openedx.core.djangoapps.xblock import api as xblock_api TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex @@ -92,6 +96,21 @@ def setUp(self): resp = self.client.ajax_post('/xblock/', data) self.assertEqual(resp.status_code, 200) + self.library = lib_api.create_library( + org=OrganizationFactory.create(short_name="org1"), + slug="lib", + title="Library", + ) + self.library_block_metadata = lib_api.create_library_block( + self.library.key, + "video", + "video-transcript", + ) + self.library_block = xblock_api.load_block( + self.library_block_metadata.usage_key, + self.user, + ) + self.video_usage_key = self._get_usage_key(resp) self.item = modulestore().get_item(self.video_usage_key) # hI10vDNYz4M - valid Youtube ID with transcripts. @@ -702,6 +721,25 @@ def test_replace_transcript_success(self, edx_video_id): expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT) self.assertDictEqual(actual_sjson_content, expected_sjson_content) + def test_replace_transcript_library_content_success(self): + # Make call to replace transcripts from youtube + response = self.replace_transcript(self.library_block_metadata.usage_key, self.youtube_id) + + # Verify the response + self.assert_response(response, expected_status_code=200, expected_message='Success') + + # Obtain updated block + updated_block = xblock_api.load_block( + self.library_block_metadata.usage_key, + self.user, + ) + + # Verify transcript content + transcript = get_transcript(updated_block, 'en', Transcript.SJSON) + actual_sjson_content = json.loads(transcript[0]) + expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT) + self.assertDictEqual(actual_sjson_content, expected_sjson_content) + def test_replace_transcript_fails_without_data(self): """ Verify that replace transcript fails if we do not provide video data in request. diff --git a/cms/djangoapps/contentstore/views/transcripts_ajax.py b/cms/djangoapps/contentstore/views/transcripts_ajax.py index 8cb7f455013b..f98457a008f6 100644 --- a/cms/djangoapps/contentstore/views/transcripts_ajax.py +++ b/cms/djangoapps/contentstore/views/transcripts_ajax.py @@ -19,7 +19,8 @@ from django.utils.translation import gettext as _ from edxval.api import create_external_video, create_or_update_video_transcript from opaque_keys import InvalidKeyError -from opaque_keys.edx.keys import UsageKey +from opaque_keys.edx.keys import UsageKey, UsageKeyV2 +from opaque_keys.edx.locator import LibraryLocatorV2 from cms.djangoapps.contentstore.video_storage_handlers import TranscriptProvider from common.djangoapps.student.auth import has_course_author_access @@ -43,6 +44,9 @@ get_transcript_link_from_youtube, get_transcript_links_from_youtube, ) +from openedx.core.djangoapps.content_libraries import api as lib_api +from openedx.core.djangoapps.xblock import api as xblock_api +from openedx.core.djangoapps.xblock.data import CheckPerm __all__ = [ 'upload_transcripts', @@ -87,6 +91,45 @@ def link_video_to_component(video_component, user): return edx_video_id +def save_video_transcript_in_learning_core( + usage_key, + input_format, + transcript_content, + language_code +): + """ + Saves a video transcript to the learning core + + Arguments: + usage_key: UsageKey of the block + input_format: Input transcript format for content being passed. + transcript_content: Content of the transcript file + language_code: transcript language code + + Returns: + result: A boolean indicating whether the transcript was saved or not. + video_key: Key used in video filename + """ + try: + srt_content = Transcript.convert( + content=transcript_content, + input_format=input_format, + output_format=Transcript.SRT + ).encode() + + filename = f"static/transcript-{language_code}.srt" + lib_api.add_library_block_static_asset_file( + usage_key, + filename, + srt_content, + ) + result = True + except (TranscriptsGenerationException, UnicodeDecodeError): + result = False + + return result + + def save_video_transcript(edx_video_id, input_format, transcript_content, language_code): """ Saves a video transcript to the VAL and its content to the configured django storage(DS). @@ -118,6 +161,7 @@ def save_video_transcript(edx_video_id, input_format, transcript_content, langua }, file_data=ContentFile(sjson_subs), ) + result = True except (TranscriptsGenerationException, UnicodeDecodeError): result = False @@ -145,6 +189,7 @@ def validate_video_block(request, locator): item = _get_item(request, {'locator': locator}) if item.category != 'video': error = _('Transcripts are supported only for "video" blocks.') + except (InvalidKeyError, ItemNotFoundError): error = _('Cannot find item by locator.') @@ -319,61 +364,38 @@ def check_transcripts(request): # lint-amnesty, pylint: disable=too-many-statem get_transcript_from_val(edx_video_id=edx_video_id, lang='en') command = 'found' except NotFoundError: - filename = f'subs_{item.sub}.srt.sjson' - content_location = StaticContent.compute_location(item.location.course_key, filename) - try: - local_transcripts = contentstore().find(content_location).data.decode('utf-8') - transcripts_presence['current_item_subs'] = item.sub - except NotFoundError: - pass - # Check for youtube transcripts presence youtube_id = videos.get('youtube', None) if youtube_id: - transcripts_presence['is_youtube_mode'] = True + _check_youtube_transcripts( + transcripts_presence, + youtube_id, + item, + ) - # youtube local - filename = f'subs_{youtube_id}.srt.sjson' + if not isinstance(item.usage_key, UsageKeyV2): + filename = f'subs_{item.sub}.srt.sjson' content_location = StaticContent.compute_location(item.location.course_key, filename) try: - local_transcripts = contentstore().find(content_location).data.decode('utf-8') - transcripts_presence['youtube_local'] = True + contentstore().find(content_location).data.decode('utf-8') + transcripts_presence['current_item_subs'] = item.sub except NotFoundError: - log.debug("Can't find transcripts in storage for youtube id: %s", youtube_id) + pass - if get_transcript_link_from_youtube(youtube_id): - transcripts_presence['youtube_server'] = True - #check youtube local and server transcripts for equality - if transcripts_presence['youtube_server'] and transcripts_presence['youtube_local']: + # Check for html5 local transcripts presence + html5_subs = [] + for html5_id in videos['html5']: + filename = f'subs_{html5_id}.srt.sjson' + content_location = StaticContent.compute_location(item.location.course_key, filename) try: - transcript_links = get_transcript_links_from_youtube( - youtube_id, - settings, - item.runtime.service(item, "i18n") + html5_subs.append(contentstore().find(content_location).data) + transcripts_presence['html5_local'].append(html5_id) + except NotFoundError: + log.debug("Can't find transcripts in storage for non-youtube video_id: %s", html5_id) + if len(html5_subs) == 2: # check html5 transcripts for equality + transcripts_presence['html5_equal'] = ( + json.loads(html5_subs[0].decode('utf-8')) == json.loads(html5_subs[1].decode('utf-8')) ) - for (_, link) in transcript_links.items(): - youtube_server_subs = get_transcript_from_youtube( - link, youtube_id, item.runtime.service(item, "i18n") - ) - if json.loads(local_transcripts) == youtube_server_subs: # check transcripts for equality - transcripts_presence['youtube_diff'] = False - except GetTranscriptsFromYouTubeException: - pass - - # Check for html5 local transcripts presence - html5_subs = [] - for html5_id in videos['html5']: - filename = f'subs_{html5_id}.srt.sjson' - content_location = StaticContent.compute_location(item.location.course_key, filename) - try: - html5_subs.append(contentstore().find(content_location).data) - transcripts_presence['html5_local'].append(html5_id) - except NotFoundError: - log.debug("Can't find transcripts in storage for non-youtube video_id: %s", html5_id) - if len(html5_subs) == 2: # check html5 transcripts for equality - transcripts_presence['html5_equal'] = ( - json.loads(html5_subs[0].decode('utf-8')) == json.loads(html5_subs[1].decode('utf-8')) - ) command, __ = _transcripts_logic(transcripts_presence, videos) @@ -381,6 +403,43 @@ def check_transcripts(request): # lint-amnesty, pylint: disable=too-many-statem return JsonResponse(transcripts_presence) +def _check_youtube_transcripts(transcripts_presence, youtube_id, item): + """ + Check for youtube transcripts presence + """ + transcripts_presence['is_youtube_mode'] = True + + if get_transcript_link_from_youtube(youtube_id): + transcripts_presence['youtube_server'] = True + + if not isinstance(item.usage_key, UsageKeyV2): + # youtube local + filename = f'subs_{youtube_id}.srt.sjson' + content_location = StaticContent.compute_location(item.location.course_key, filename) + try: + local_transcripts = contentstore().find(content_location).data.decode('utf-8') + transcripts_presence['youtube_local'] = True + except NotFoundError: + log.debug("Can't find transcripts in storage for youtube id: %s", youtube_id) + + # check youtube local and server transcripts for equality + if transcripts_presence['youtube_server'] and transcripts_presence['youtube_local']: + try: + transcript_links = get_transcript_links_from_youtube( + youtube_id, + settings, + item.runtime.service(item, "i18n") + ) + for (_, link) in transcript_links.items(): + youtube_server_subs = get_transcript_from_youtube( + link, youtube_id, item.runtime.service(item, "i18n") + ) + if json.loads(local_transcripts) == youtube_server_subs: # check transcripts for equality + transcripts_presence['youtube_diff'] = False + except GetTranscriptsFromYouTubeException: + pass + + def _transcripts_logic(transcripts_presence, videos): """ By `transcripts_presence` content, figure what show to user: @@ -447,7 +506,7 @@ def _validate_transcripts_data(request): data: dict, loaded json from request, videos: parsed `data` to useful format, - item: video item from storage + item: video item from storage or library Raises `TranscriptsRequestValidationException` if validation is unsuccessful or `PermissionDenied` if user has no access. @@ -529,6 +588,7 @@ def choose_transcripts(request): Or error in case of validation failures. """ error, validated_data = validate_transcripts_request(request, include_html5=True) + edx_video_id = None if error: response = error_response({}, error) else: @@ -546,10 +606,24 @@ def choose_transcripts(request): return error_response({}, _('No such transcript.')) # 2. Link a video to video component if its not already linked to one. - edx_video_id = link_video_to_component(video, request.user) + if not isinstance(video.usage_key.context_key, LibraryLocatorV2): + edx_video_id = link_video_to_component(video, request.user) # 3. Upload the retrieved transcript to DS for the linked video ID. - success = save_video_transcript(edx_video_id, input_format, transcript_content, language_code='en') + if isinstance(video.usage_key.context_key, LibraryLocatorV2): + success = save_video_transcript_in_learning_core( + video.usage_key, + input_format, + transcript_content, + language_code='en', + ) + else: + success = save_video_transcript( + edx_video_id, + input_format, + transcript_content, + language_code='en', + ) if success: response = JsonResponse({'edx_video_id': edx_video_id, 'status': 'Success'}, status=200) else: @@ -569,6 +643,7 @@ def rename_transcripts(request): Or error in case of validation failures. """ error, validated_data = validate_transcripts_request(request) + edx_video_id = None if error: response = error_response({}, error) else: @@ -585,10 +660,24 @@ def rename_transcripts(request): return error_response({}, _('No such transcript.')) # 2. Link a video to video component if its not already linked to one. - edx_video_id = link_video_to_component(video, request.user) + if not isinstance(video.usage_key.context_key, LibraryLocatorV2): + edx_video_id = link_video_to_component(video, request.user) # 3. Upload the retrieved transcript to DS for the linked video ID. - success = save_video_transcript(edx_video_id, input_format, transcript_content, language_code='en') + if isinstance(video.usage_key.context_key, LibraryLocatorV2): + success = save_video_transcript_in_learning_core( + video.usage_key, + input_format, + transcript_content, + language_code='en', + ) + else: + success = save_video_transcript( + edx_video_id, + input_format, + transcript_content, + language_code='en', + ) if success: response = JsonResponse({'edx_video_id': edx_video_id, 'status': 'Success'}, status=200) else: @@ -610,6 +699,7 @@ def replace_transcripts(request): """ error, validated_data = validate_transcripts_request(request, include_yt=True) youtube_id = validated_data['youtube'] + edx_video_id = None if error: response = error_response({}, error) elif not youtube_id: @@ -623,16 +713,34 @@ def replace_transcripts(request): return error_response({}, str(e)) # 2. Link a video to video component if its not already linked to one. - edx_video_id = link_video_to_component(video, request.user) - - # for transcript in transcript_links: + if not isinstance(video.usage_key.context_key, LibraryLocatorV2): + edx_video_id = link_video_to_component(video, request.user) # 3. Upload YT transcript to DS for the linked video ID. success = True for transcript in transcript_content: [language_code, json_content] = transcript - success = save_video_transcript(edx_video_id, Transcript.SJSON, json_content, language_code) + if isinstance(video.usage_key.context_key, LibraryLocatorV2): + success = save_video_transcript_in_learning_core( + video.usage_key, + Transcript.SJSON, + json_content, + language_code, + ) + filename = f"transcript-{language_code}.srt" + else: + success = save_video_transcript( + edx_video_id, + Transcript.SJSON, + json_content, + language_code, + ) + filename = f"{edx_video_id}-{language_code}.srt" + if not success: + break + video.transcripts[language_code] = filename if success: + video.save() response = JsonResponse({'edx_video_id': edx_video_id, 'status': 'Success'}, status=200) else: response = error_response({}, _('There is a problem with the YouTube transcript file.')) @@ -643,17 +751,25 @@ def replace_transcripts(request): def _get_item(request, data): """ Obtains from 'data' the locator for an item. - Next, gets that item from the modulestore (allowing any errors to raise up). + Next, gets that item from the modulestore (allowing any errors to raise up) + or from library API if is a library content. Finally, verifies that the user has access to the item. - Returns the item. + Returns the item and a boolean if is a library content. """ usage_key = UsageKey.from_string(data.get('locator')) - if not usage_key.context_key.is_course: - # TODO: implement transcript support for learning core / content libraries. - raise TranscriptsRequestValidationException(_('Transcripts are not yet supported in content libraries.')) + + context_key = usage_key.context_key + if not context_key.is_course: + if isinstance(context_key, LibraryLocatorV2): + return xblock_api.load_block( + usage_key, + request.user, + check_permission=CheckPerm.CAN_EDIT, + ) + raise TranscriptsRequestValidationException(_('Transcripts are not yet supported for this type of block')) # This is placed before has_course_author_access() to validate the location, - # because has_course_author_access() raises r if location is invalid. + # because has_course_author_access() raises error if location is invalid. item = modulestore().get_item(usage_key) # use the item's course_key, because the usage_key might not have the run diff --git a/cms/lib/xblock/upstream_sync.py b/cms/lib/xblock/upstream_sync.py index 8a12ea8fc045..22cad3c6d36a 100644 --- a/cms/lib/xblock/upstream_sync.py +++ b/cms/lib/xblock/upstream_sync.py @@ -297,7 +297,11 @@ def _update_non_customizable_fields(*, upstream: XBlock, downstream: XBlock) -> """ syncable_fields = _get_synchronizable_fields(upstream, downstream) customizable_fields = set(downstream.get_customizable_fields().keys()) + isVideoBlock = downstream.usage_key.block_type == "video" for field_name in syncable_fields - customizable_fields: + if isVideoBlock and field_name == 'edx_video_id': + # Avoid overwriting edx_video_id between blocks + continue new_upstream_value = getattr(upstream, field_name) setattr(downstream, field_name, new_upstream_value) diff --git a/openedx/core/djangoapps/xblock/rest_api/views.py b/openedx/core/djangoapps/xblock/rest_api/views.py index 0fd202b4eff2..273bc3ffec3e 100644 --- a/openedx/core/djangoapps/xblock/rest_api/views.py +++ b/openedx/core/djangoapps/xblock/rest_api/views.py @@ -178,7 +178,7 @@ def xblock_handler( """ # To support sandboxed XBlocks, custom frontends, and other use cases, we # authenticate requests using a secure token in the URL. see - # openedx.core.djangoapps.xblock.utils.get_secure_hash_for_xblock_handler + # openedx.core.djangoapps.xblock.utils.get_secure_token_for_xblock_handler # for details and rationale. if not validate_secure_token_for_xblock_handler(user_id, str(usage_key), secure_token): raise PermissionDenied("Invalid/expired auth token.") diff --git a/xmodule/tests/test_video.py b/xmodule/tests/test_video.py index 5e95f77082b1..e98c78244db5 100644 --- a/xmodule/tests/test_video.py +++ b/xmodule/tests/test_video.py @@ -741,6 +741,48 @@ def test_export_to_xml(self, mock_val_api): course_id=self.block.scope_ids.usage_id.context_key, ) + def test_export_to_xml_without_video_id(self): + """ + Test that we write the correct XML on export of a video without edx_video_id. + """ + self.block.youtube_id_0_75 = 'izygArpw-Qo' + self.block.youtube_id_1_0 = 'p2Q6BrNhdh8' + self.block.youtube_id_1_25 = '1EeWXzPdhSA' + self.block.youtube_id_1_5 = 'rABDYkeK0x8' + self.block.show_captions = False + self.block.start_time = datetime.timedelta(seconds=1.0) + self.block.end_time = datetime.timedelta(seconds=60) + self.block.track = 'http://www.example.com/track' + self.block.handout = 'http://www.example.com/handout' + self.block.download_track = True + self.block.html5_sources = ['http://www.example.com/source.mp4', 'http://www.example.com/source1.ogg'] + self.block.download_video = True + self.block.transcripts = {'ua': 'ukrainian_translation.srt', 'ge': 'german_translation.srt'} + + xml = self.block.definition_to_xml(self.file_system) + parser = etree.XMLParser(remove_blank_text=True) + xml_string = '''\ + + ''' + expected = etree.XML(xml_string, parser=parser) + self.assertXmlEqual(expected, xml) + @patch('xmodule.video_block.video_block.edxval_api') def test_export_to_xml_val_error(self, mock_val_api): # Export should succeed without VAL data if video does not exist diff --git a/xmodule/video_block/transcripts_utils.py b/xmodule/video_block/transcripts_utils.py index 866edf596812..4dda003b1f47 100644 --- a/xmodule/video_block/transcripts_utils.py +++ b/xmodule/video_block/transcripts_utils.py @@ -20,6 +20,7 @@ from opaque_keys.edx.keys import UsageKeyV2 from pysrt import SubRipFile, SubRipItem, SubRipTime from pysrt.srtexc import Error +from opaque_keys.edx.locator import LibraryLocatorV2 from openedx.core.djangoapps.xblock.api import get_component_from_usage_key from xmodule.contentstore.content import StaticContent @@ -498,16 +499,17 @@ def manage_video_subtitles_save(item, user, old_metadata=None, generate_translat remove_subs_from_store(video_id, item, lang) reraised_message = '' - for lang in new_langs: # 3b - try: - generate_sjson_for_all_speeds( - item, - item.transcripts[lang], - {speed: subs_id for subs_id, speed in youtube_speed_dict(item).items()}, - lang, - ) - except TranscriptException: - pass + if not isinstance(item.usage_key.context_key, LibraryLocatorV2): + for lang in new_langs: # 3b + try: + generate_sjson_for_all_speeds( + item, + item.transcripts[lang], + {speed: subs_id for subs_id, speed in youtube_speed_dict(item).items()}, + lang, + ) + except TranscriptException: + pass if reraised_message: item.save_with_metadata(user) raise TranscriptException(reraised_message) @@ -1040,6 +1042,13 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts return transcript_content, transcript_name, Transcript.mime_types[output_format] +def build_components_import_path(usage_key, file_path): + """ + Build components import path + """ + return f"components/{usage_key.block_type}/{usage_key.block_id}/{file_path}" + + def get_transcript_from_learning_core(video_block, language, output_format, transcripts_info): """ Get video transcript from Learning Core (used for Content Libraries) diff --git a/xmodule/video_block/video_block.py b/xmodule/video_block/video_block.py index 84d7edcf7263..30af789506ed 100644 --- a/xmodule/video_block/video_block.py +++ b/xmodule/video_block/video_block.py @@ -855,11 +855,15 @@ def definition_to_xml(self, resource_fs): # lint-amnesty, pylint: disable=too-m if new_transcripts.get('en'): xml.set('sub', '') - # Update `transcripts` attribute in the xml - xml.set('transcripts', json.dumps(transcripts, sort_keys=True)) - except edxval_api.ValVideoNotFoundError: pass + else: + if transcripts.get('en'): + xml.set('sub', '') + + if transcripts: + # Update `transcripts` attribute in the xml + xml.set('transcripts', json.dumps(transcripts, sort_keys=True)) # Sorting transcripts for easy testing of resulting xml for transcript_language in sorted(transcripts.keys()): diff --git a/xmodule/video_block/video_handlers.py b/xmodule/video_block/video_handlers.py index b7857e881ece..6d136b8b6869 100644 --- a/xmodule/video_block/video_handlers.py +++ b/xmodule/video_block/video_handlers.py @@ -13,13 +13,14 @@ from django.core.files.base import ContentFile from django.utils.timezone import now from edxval.api import create_external_video, create_or_update_video_transcript, delete_video_transcript -from opaque_keys.edx.locator import CourseLocator +from opaque_keys.edx.locator import CourseLocator, LibraryLocatorV2 from webob import Response from xblock.core import XBlock from xblock.exceptions import JsonHandlerError from xmodule.exceptions import NotFoundError from xmodule.fields import RelativeTime +from openedx.core.djangoapps.content_libraries import api as lib_api from .transcripts_utils import ( Transcript, @@ -467,7 +468,6 @@ def validate_transcript_upload_data(self, data): return error - # pylint: disable=too-many-statements @XBlock.handler def studio_transcript(self, request, dispatch): """ @@ -495,118 +495,179 @@ def studio_transcript(self, request, dispatch): no SRT extension or not parse-able by PySRT UnicodeDecodeError: non-UTF8 uploaded file content encoding. """ - _ = self.runtime.service(self, "i18n").ugettext - if dispatch.startswith('translation'): if request.method == 'POST': - error = self.validate_transcript_upload_data(data=request.POST) - if error: - response = Response(json={'error': error}, status=400) - else: - edx_video_id = clean_video_id(request.POST['edx_video_id']) - language_code = request.POST['language_code'] - new_language_code = request.POST['new_language_code'] - transcript_file = request.POST['file'].file - - if not edx_video_id: - # Back-populate the video ID for an external video. - # pylint: disable=attribute-defined-outside-init - self.edx_video_id = edx_video_id = create_external_video(display_name='external video') - - try: - # Convert SRT transcript into an SJSON format - # and upload it to S3. - sjson_subs = Transcript.convert( - content=transcript_file.read().decode('utf-8'), - input_format=Transcript.SRT, - output_format=Transcript.SJSON - ).encode() - create_or_update_video_transcript( - video_id=edx_video_id, - language_code=language_code, - metadata={ - 'file_format': Transcript.SJSON, - 'language_code': new_language_code - }, - file_data=ContentFile(sjson_subs), - ) - payload = { - 'edx_video_id': edx_video_id, - 'language_code': new_language_code - } - # If a new transcript is added, then both new_language_code and - # language_code fields will have the same value. - if language_code != new_language_code: - self.transcripts.pop(language_code, None) - self.transcripts[new_language_code] = f'{edx_video_id}-{new_language_code}.srt' - response = Response(json.dumps(payload), status=201) - except (TranscriptsGenerationException, UnicodeDecodeError): - response = Response( - json={ - 'error': _( - 'There is a problem with this transcript file. Try to upload a different file.' - ) - }, - status=400 - ) + response = self._studio_transcript_upload(request) elif request.method == 'DELETE': - request_data = request.json + response = self._studio_transcript_delete(request) + elif request.method == 'GET': + response = self._studio_transcript_get(request) + else: + # Any other HTTP method is not allowed. + response = Response(status=404) - if 'lang' not in request_data or 'edx_video_id' not in request_data: - return Response(status=400) + else: # unknown dispatch + log.debug("Dispatch is not allowed") + response = Response(status=404) - language = request_data['lang'] - edx_video_id = clean_video_id(request_data['edx_video_id']) + return response - if edx_video_id: - delete_video_transcript(video_id=edx_video_id, language_code=language) + def _save_transcript_field(self): + """ + Save `transcripts` block field. + """ + field = self.fields['transcripts'] + if self.transcripts: + transcripts_copy = self.transcripts.copy() + # Need to delete to overwrite, it's weird behavior, + # but it only works like this. + field.delete_from(self) + field.write_to(self, transcripts_copy) + else: + field.delete_from(self) - if language == 'en': - # remove any transcript file from content store for the video ids - possible_sub_ids = [ - self.sub, # pylint: disable=access-member-before-definition - self.youtube_id_1_0 - ] + get_html5_ids(self.html5_sources) - for sub_id in possible_sub_ids: - remove_subs_from_store(sub_id, self, language) + def _studio_transcript_upload(self, request): + """ + Upload transcript. Used in "POST" method in `studio_transcript` + """ + _ = self.runtime.service(self, "i18n").ugettext + error = self.validate_transcript_upload_data(data=request.POST) + if error: + response = Response(json={'error': error}, status=400) + else: + edx_video_id = clean_video_id(request.POST['edx_video_id']) + language_code = request.POST['language_code'] + new_language_code = request.POST['new_language_code'] + transcript_file = request.POST['file'].file - # update metadata as `en` can also be present in `transcripts` field - remove_subs_from_store(self.transcripts.pop(language, None), self, language) + isLibrary = isinstance(self.usage_key.context_key, LibraryLocatorV2) - # also empty `sub` field - self.sub = '' # pylint: disable=attribute-defined-outside-init + if isLibrary: + filename = f'transcript-{new_language_code}.srt' + else: + if not edx_video_id: + # Back-populate the video ID for an external video. + # pylint: disable=attribute-defined-outside-init + self.edx_video_id = edx_video_id = create_external_video(display_name='external video') + filename = f'{edx_video_id}-{new_language_code}.srt' + + try: + # Convert SRT transcript into an SJSON format + # and upload it to S3. + content = transcript_file.read() + payload = { + 'edx_video_id': edx_video_id, + 'language_code': new_language_code + } + if isLibrary: + # Save transcript as static asset in Learning Core if is a library component + filename = f"static/{filename}" + lib_api.add_library_block_static_asset_file( + self.usage_key, + filename, + content, + ) else: - remove_subs_from_store(self.transcripts.pop(language, None), self, language) + sjson_subs = Transcript.convert( + content=content.decode('utf-8'), + input_format=Transcript.SRT, + output_format=Transcript.SJSON + ).encode() + create_or_update_video_transcript( + video_id=edx_video_id, + language_code=language_code, + metadata={ + 'file_format': Transcript.SJSON, + 'language_code': new_language_code + }, + file_data=ContentFile(sjson_subs), + ) - return Response(status=200) + # If a new transcript is added, then both new_language_code and + # language_code fields will have the same value. + if language_code != new_language_code: + self.transcripts.pop(language_code, None) + self.transcripts[new_language_code] = filename - elif request.method == 'GET': - language = request.GET.get('language_code') - if not language: - return Response(json={'error': _('Language is required.')}, status=400) + if isLibrary: + self._save_transcript_field() + response = Response(json.dumps(payload), status=201) + except (TranscriptsGenerationException, UnicodeDecodeError): + response = Response( + json={ + 'error': _( + 'There is a problem with this transcript file. Try to upload a different file.' + ) + }, + status=400 + ) + return response - try: - transcript_content, transcript_name, mime_type = get_transcript( - video=self, lang=language, output_format=Transcript.SRT - ) - response = Response(transcript_content, headerlist=[ - ( - 'Content-Disposition', - f'attachment; filename="{transcript_name}"' - ), - ('Content-Language', language), - ('Content-Type', mime_type) - ]) - except (UnicodeDecodeError, TranscriptsGenerationException, NotFoundError): - response = Response(status=404) + def _studio_transcript_delete(self, request): + """ + Delete transcript. Used in "DELETE" method in `studio_transcript` + """ + request_data = request.json + + if 'lang' not in request_data or 'edx_video_id' not in request_data: + return Response(status=400) + + language = request_data['lang'] + edx_video_id = clean_video_id(request_data['edx_video_id']) + if edx_video_id: + delete_video_transcript(video_id=edx_video_id, language_code=language) + + if isinstance(self.usage_key.context_key, LibraryLocatorV2): + transcript_name = self.transcripts.pop(language, None) + if transcript_name: + lib_api.delete_library_block_static_asset_file( + self.usage_key, + f"static/{transcript_name}", + ) + self._save_transcript_field() + else: + if language == 'en': + # remove any transcript file from content store for the video ids + possible_sub_ids = [ + self.sub, # pylint: disable=access-member-before-definition + self.youtube_id_1_0 + ] + get_html5_ids(self.html5_sources) + for sub_id in possible_sub_ids: + remove_subs_from_store(sub_id, self, language) + + # update metadata as `en` can also be present in `transcripts` field + remove_subs_from_store(self.transcripts.pop(language, None), self, language) + + # also empty `sub` field + self.sub = '' # pylint: disable=attribute-defined-outside-init else: - # Any other HTTP method is not allowed. - response = Response(status=404) + remove_subs_from_store(self.transcripts.pop(language, None), self, language) - else: # unknown dispatch - log.debug("Dispatch is not allowed") - response = Response(status=404) + return Response(status=200) + def _studio_transcript_get(self, request): + """ + Get transcript. Used in "GET" method in `studio_transcript` + """ + _ = self.runtime.service(self, "i18n").ugettext + language = request.GET.get('language_code') + if not language: + return Response(json={'error': _('Language is required.')}, status=400) + + try: + transcript_content, transcript_name, mime_type = get_transcript( + video=self, lang=language, output_format=Transcript.SRT + ) + response = Response(transcript_content, headerlist=[ + ( + 'Content-Disposition', + f'attachment; filename="{transcript_name}"' + ), + ('Content-Language', language), + ('Content-Type', mime_type) + ]) + except (UnicodeDecodeError, TranscriptsGenerationException, NotFoundError): + response = Response(status=404) return response