diff --git a/RELEASENOTES.md b/RELEASENOTES.md index b6a0ea00a8..7fb528214a 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -71,6 +71,9 @@ * Fix `IllegalStateException` when an SSA file contains a cue with zero duration (start and end time equal) ([#2052](https://github.com/androidx/media/issues/2052)). + * Suppress (and log) subtitle parsing errors when subtitles are muxed into + the same container as audio and video + ([#2052](https://github.com/androidx/media/issues/2052)). * Metadata: * Image: * DataSource: diff --git a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/e2etest/SubtitlePlaybackTest.java b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/e2etest/SubtitlePlaybackTest.java index 9f3bfaa4c7..696933d011 100644 --- a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/e2etest/SubtitlePlaybackTest.java +++ b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/e2etest/SubtitlePlaybackTest.java @@ -307,6 +307,40 @@ public void onLoadError( applicationContext, playbackOutput, "playbackdumps/subtitles/sideloaded-error.mp4.dump"); } + // TODO: b/391362063 - Assert that this error gets propagated out after that is implemented. + @Test + public void muxedSubtitleParsingError_playbackContinues() throws Exception { + Context applicationContext = ApplicationProvider.getApplicationContext(); + CapturingRenderersFactory capturingRenderersFactory = + new CapturingRenderersFactory(applicationContext); + ExoPlayer player = + new ExoPlayer.Builder(applicationContext, capturingRenderersFactory) + .setClock(new FakeClock(/* isAutoAdvancing= */ true)) + .setMediaSourceFactory( + new DefaultMediaSourceFactory(applicationContext) + .setSubtitleParserFactory( + new ThrowingSubtitleParserFactory( + () -> new IllegalStateException("test subtitle parsing error")))) + .build(); + Surface surface = new Surface(new SurfaceTexture(/* texName= */ 1)); + player.setVideoSurface(surface); + PlaybackOutput playbackOutput = PlaybackOutput.register(player, capturingRenderersFactory); + MediaItem mediaItem = + new MediaItem.Builder().setUri("asset:///media/mkv/sample_with_srt.mkv").build(); + + player.setMediaItem(mediaItem); + player.prepare(); + run(player).untilState(Player.STATE_READY); + run(player).untilFullyBuffered(); + player.play(); + run(player).untilState(Player.STATE_ENDED); + player.release(); + surface.release(); + + DumpFileAsserts.assertOutput( + applicationContext, playbackOutput, "playbackdumps/subtitles/muxed-parsing-error.mkv.dump"); + } + /** * An {@link ExtractorsFactory} which creates a {@link FragmentedMp4Extractor} configured to * extract a single additional caption track. diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingExtractorOutput.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingExtractorOutput.java index faff23b5e3..46b250b796 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingExtractorOutput.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingExtractorOutput.java @@ -46,6 +46,8 @@ public final class SubtitleTranscodingExtractorOutput implements ExtractorOutput private final SubtitleParser.Factory subtitleParserFactory; private final SparseArray textTrackOutputs; + private boolean hasNonTextTracks; + public SubtitleTranscodingExtractorOutput( ExtractorOutput delegate, SubtitleParser.Factory subtitleParserFactory) { this.delegate = delegate; @@ -64,6 +66,7 @@ public void resetSubtitleParsers() { @Override public TrackOutput track(int id, @C.TrackType int type) { if (type != C.TRACK_TYPE_TEXT) { + hasNonTextTracks = true; return delegate.track(id, type); } SubtitleTranscodingTrackOutput existingTrackOutput = textTrackOutputs.get(id); @@ -79,6 +82,11 @@ public TrackOutput track(int id, @C.TrackType int type) { @Override public void endTracks() { delegate.endTracks(); + if (hasNonTextTracks) { + for (int i = 0; i < textTrackOutputs.size(); i++) { + textTrackOutputs.valueAt(i).shouldSuppressParsingErrors(true); + } + } } @Override diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingTrackOutput.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingTrackOutput.java index d0ad9ff1ef..f132ae7443 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingTrackOutput.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingTrackOutput.java @@ -26,6 +26,7 @@ import androidx.media3.common.DataReader; import androidx.media3.common.Format; import androidx.media3.common.MimeTypes; +import androidx.media3.common.util.Log; import androidx.media3.common.util.ParsableByteArray; import androidx.media3.common.util.Util; import androidx.media3.extractor.TrackOutput; @@ -40,6 +41,8 @@ */ /* package */ final class SubtitleTranscodingTrackOutput implements TrackOutput { + private static final String TAG = "SubtitleTranscodingTO"; + private final TrackOutput delegate; private final SubtitleParser.Factory subtitleParserFactory; private final CueEncoder cueEncoder; @@ -50,6 +53,7 @@ private byte[] sampleData; @Nullable private SubtitleParser currentSubtitleParser; private @MonotonicNonNull Format currentFormat; + private boolean shouldSuppressParsingErrors; public SubtitleTranscodingTrackOutput( TrackOutput delegate, SubtitleParser.Factory subtitleParserFactory) { @@ -68,6 +72,16 @@ public void resetSubtitleParser() { } } + /** + * Sets whether to suppress parsing errors thrown during the transcoding in {@link + * #sampleMetadata}. + * + *

Defaults to {@code false}. + */ + public void shouldSuppressParsingErrors(boolean shouldSuppressParsingErrors) { + this.shouldSuppressParsingErrors = shouldSuppressParsingErrors; + } + // TrackOutput implementation @Override @@ -143,12 +157,21 @@ public void sampleMetadata( checkArgument(cryptoData == null, "DRM on subtitles is not supported"); int sampleStart = sampleDataEnd - offset - size; - currentSubtitleParser.parse( - sampleData, - sampleStart, - size, - SubtitleParser.OutputOptions.allCues(), - cuesWithTiming -> outputSample(cuesWithTiming, timeUs, flags)); + try { + currentSubtitleParser.parse( + sampleData, + sampleStart, + size, + SubtitleParser.OutputOptions.allCues(), + cuesWithTiming -> outputSample(cuesWithTiming, timeUs, flags)); + } catch (RuntimeException e) { + if (shouldSuppressParsingErrors) { + // TODO: b/391362063 - Propagate this error out in a non-fatal way. + Log.w(TAG, "Parsing subtitles failed, ignoring sample.", e); + } else { + throw e; + } + } sampleDataStart = sampleStart + size; if (sampleDataStart == sampleDataEnd) { // The array is now empty, so we can move the start and end pointers back to the start. diff --git a/libraries/test_data/src/test/assets/playbackdumps/subtitles/muxed-parsing-error.mkv.dump b/libraries/test_data/src/test/assets/playbackdumps/subtitles/muxed-parsing-error.mkv.dump new file mode 100644 index 0000000000..f203114532 --- /dev/null +++ b/libraries/test_data/src/test/assets/playbackdumps/subtitles/muxed-parsing-error.mkv.dump @@ -0,0 +1,528 @@ +MediaCodecAdapter (exotest.audio.ac3): + inputBuffers: + count = 30 + input buffer #0: + timeUs = 1000000062000 + contents = length 416, hash 211F2286 + input buffer #1: + timeUs = 1000000097000 + contents = length 418, hash 77425A86 + input buffer #2: + timeUs = 1000000131000 + contents = length 418, hash A0FE5CA1 + input buffer #3: + timeUs = 1000000166000 + contents = length 418, hash 2309B066 + input buffer #4: + timeUs = 1000000201000 + contents = length 418, hash 928A653B + input buffer #5: + timeUs = 1000000236000 + contents = length 418, hash 3422F0CB + input buffer #6: + timeUs = 1000000270000 + contents = length 418, hash EFF43D5B + input buffer #7: + timeUs = 1000000306000 + contents = length 418, hash FC8093C7 + input buffer #8: + timeUs = 1000000341000 + contents = length 418, hash CCC08A16 + input buffer #9: + timeUs = 1000000376000 + contents = length 418, hash 2A6EE863 + input buffer #10: + timeUs = 1000000410000 + contents = length 418, hash D69A9251 + input buffer #11: + timeUs = 1000000445000 + contents = length 418, hash BCFB758D + input buffer #12: + timeUs = 1000000480000 + contents = length 418, hash 11B66799 + input buffer #13: + timeUs = 1000000514000 + contents = length 418, hash C824D392 + input buffer #14: + timeUs = 1000000550000 + contents = length 418, hash C167D872 + input buffer #15: + timeUs = 1000000585000 + contents = length 418, hash 4221C855 + input buffer #16: + timeUs = 1000000620000 + contents = length 418, hash 4D4FF934 + input buffer #17: + timeUs = 1000000654000 + contents = length 418, hash 984AA025 + input buffer #18: + timeUs = 1000000690000 + contents = length 418, hash BB788B46 + input buffer #19: + timeUs = 1000000724000 + contents = length 418, hash 9EFBFD97 + input buffer #20: + timeUs = 1000000759000 + contents = length 418, hash DF1A460C + input buffer #21: + timeUs = 1000000793000 + contents = length 418, hash 2BDB56A + input buffer #22: + timeUs = 1000000829000 + contents = length 418, hash CA230060 + input buffer #23: + timeUs = 1000000864000 + contents = length 418, hash D2F19F41 + input buffer #24: + timeUs = 1000000898000 + contents = length 418, hash AF392D79 + input buffer #25: + timeUs = 1000000932000 + contents = length 418, hash C5D7F2A3 + input buffer #26: + timeUs = 1000000968000 + contents = length 418, hash 733A35AE + input buffer #27: + timeUs = 1000001002000 + contents = length 418, hash DE46E5D3 + input buffer #28: + timeUs = 1000001037000 + contents = length 418, hash 56AB8D37 + input buffer #29: + timeUs = 0 + flags = 4 + contents = length 0, hash 1 + outputBuffers: + count = 29 + output buffer #0: + timeUs = 1000000062000 + size = 0 + rendered = false + output buffer #1: + timeUs = 1000000097000 + size = 0 + rendered = false + output buffer #2: + timeUs = 1000000131000 + size = 0 + rendered = false + output buffer #3: + timeUs = 1000000166000 + size = 0 + rendered = false + output buffer #4: + timeUs = 1000000201000 + size = 0 + rendered = false + output buffer #5: + timeUs = 1000000236000 + size = 0 + rendered = false + output buffer #6: + timeUs = 1000000270000 + size = 0 + rendered = false + output buffer #7: + timeUs = 1000000306000 + size = 0 + rendered = false + output buffer #8: + timeUs = 1000000341000 + size = 0 + rendered = false + output buffer #9: + timeUs = 1000000376000 + size = 0 + rendered = false + output buffer #10: + timeUs = 1000000410000 + size = 0 + rendered = false + output buffer #11: + timeUs = 1000000445000 + size = 0 + rendered = false + output buffer #12: + timeUs = 1000000480000 + size = 0 + rendered = false + output buffer #13: + timeUs = 1000000514000 + size = 0 + rendered = false + output buffer #14: + timeUs = 1000000550000 + size = 0 + rendered = false + output buffer #15: + timeUs = 1000000585000 + size = 0 + rendered = false + output buffer #16: + timeUs = 1000000620000 + size = 0 + rendered = false + output buffer #17: + timeUs = 1000000654000 + size = 0 + rendered = false + output buffer #18: + timeUs = 1000000690000 + size = 0 + rendered = false + output buffer #19: + timeUs = 1000000724000 + size = 0 + rendered = false + output buffer #20: + timeUs = 1000000759000 + size = 0 + rendered = false + output buffer #21: + timeUs = 1000000793000 + size = 0 + rendered = false + output buffer #22: + timeUs = 1000000829000 + size = 0 + rendered = false + output buffer #23: + timeUs = 1000000864000 + size = 0 + rendered = false + output buffer #24: + timeUs = 1000000898000 + size = 0 + rendered = false + output buffer #25: + timeUs = 1000000932000 + size = 0 + rendered = false + output buffer #26: + timeUs = 1000000968000 + size = 0 + rendered = false + output buffer #27: + timeUs = 1000001002000 + size = 0 + rendered = false + output buffer #28: + timeUs = 1000001037000 + size = 0 + rendered = false +MediaCodecAdapter (exotest.video.avc): + inputBuffers: + count = 31 + input buffer #0: + timeUs = 1000000000000 + contents = length 36477, hash F0F36CFE + input buffer #1: + timeUs = 1000000067000 + contents = length 5341, hash 40B85E2 + input buffer #2: + timeUs = 1000000033000 + contents = length 596, hash 357B4D92 + input buffer #3: + timeUs = 1000000200000 + contents = length 7704, hash A39EDA06 + input buffer #4: + timeUs = 1000000133000 + contents = length 989, hash 2813C72D + input buffer #5: + timeUs = 1000000100000 + contents = length 721, hash C50D1C73 + input buffer #6: + timeUs = 1000000167000 + contents = length 519, hash 65FE1911 + input buffer #7: + timeUs = 1000000333000 + contents = length 6160, hash E1CAC0EC + input buffer #8: + timeUs = 1000000267000 + contents = length 953, hash 7160C661 + input buffer #9: + timeUs = 1000000233000 + contents = length 620, hash 7A7AE07C + input buffer #10: + timeUs = 1000000300000 + contents = length 405, hash 5CC7F4E7 + input buffer #11: + timeUs = 1000000433000 + contents = length 4852, hash 9DB6979D + input buffer #12: + timeUs = 1000000400000 + contents = length 547, hash E31A6979 + input buffer #13: + timeUs = 1000000367000 + contents = length 570, hash FEC40D00 + input buffer #14: + timeUs = 1000000567000 + contents = length 5525, hash 7C478F7E + input buffer #15: + timeUs = 1000000500000 + contents = length 1082, hash DA07059A + input buffer #16: + timeUs = 1000000467000 + contents = length 807, hash 93478E6B + input buffer #17: + timeUs = 1000000533000 + contents = length 744, hash 9A8E6026 + input buffer #18: + timeUs = 1000000700000 + contents = length 4732, hash C73B23C0 + input buffer #19: + timeUs = 1000000633000 + contents = length 1004, hash 8A19A228 + input buffer #20: + timeUs = 1000000600000 + contents = length 794, hash 8126022C + input buffer #21: + timeUs = 1000000667000 + contents = length 645, hash F08300E5 + input buffer #22: + timeUs = 1000000833000 + contents = length 2684, hash 727FE378 + input buffer #23: + timeUs = 1000000767000 + contents = length 787, hash 419A7821 + input buffer #24: + timeUs = 1000000733000 + contents = length 649, hash 5C159346 + input buffer #25: + timeUs = 1000000800000 + contents = length 509, hash F912D655 + input buffer #26: + timeUs = 1000000967000 + contents = length 1226, hash 29815C21 + input buffer #27: + timeUs = 1000000900000 + contents = length 898, hash D997AD0A + input buffer #28: + timeUs = 1000000867000 + contents = length 476, hash A0423645 + input buffer #29: + timeUs = 1000000933000 + contents = length 486, hash DDF32CBB + input buffer #30: + timeUs = 0 + flags = 4 + contents = length 0, hash 1 + outputBuffers: + count = 30 + output buffer #0: + timeUs = 1000000000000 + size = 36477 + rendered = true + output buffer #1: + timeUs = 1000000067000 + size = 5341 + rendered = true + output buffer #2: + timeUs = 1000000033000 + size = 596 + rendered = true + output buffer #3: + timeUs = 1000000200000 + size = 7704 + rendered = true + output buffer #4: + timeUs = 1000000133000 + size = 989 + rendered = true + output buffer #5: + timeUs = 1000000100000 + size = 721 + rendered = true + output buffer #6: + timeUs = 1000000167000 + size = 519 + rendered = true + output buffer #7: + timeUs = 1000000333000 + size = 6160 + rendered = true + output buffer #8: + timeUs = 1000000267000 + size = 953 + rendered = true + output buffer #9: + timeUs = 1000000233000 + size = 620 + rendered = true + output buffer #10: + timeUs = 1000000300000 + size = 405 + rendered = true + output buffer #11: + timeUs = 1000000433000 + size = 4852 + rendered = true + output buffer #12: + timeUs = 1000000400000 + size = 547 + rendered = true + output buffer #13: + timeUs = 1000000367000 + size = 570 + rendered = true + output buffer #14: + timeUs = 1000000567000 + size = 5525 + rendered = true + output buffer #15: + timeUs = 1000000500000 + size = 1082 + rendered = true + output buffer #16: + timeUs = 1000000467000 + size = 807 + rendered = true + output buffer #17: + timeUs = 1000000533000 + size = 744 + rendered = true + output buffer #18: + timeUs = 1000000700000 + size = 4732 + rendered = true + output buffer #19: + timeUs = 1000000633000 + size = 1004 + rendered = true + output buffer #20: + timeUs = 1000000600000 + size = 794 + rendered = true + output buffer #21: + timeUs = 1000000667000 + size = 645 + rendered = true + output buffer #22: + timeUs = 1000000833000 + size = 2684 + rendered = true + output buffer #23: + timeUs = 1000000767000 + size = 787 + rendered = true + output buffer #24: + timeUs = 1000000733000 + size = 649 + rendered = true + output buffer #25: + timeUs = 1000000800000 + size = 509 + rendered = true + output buffer #26: + timeUs = 1000000967000 + size = 1226 + rendered = true + output buffer #27: + timeUs = 1000000900000 + size = 898 + rendered = true + output buffer #28: + timeUs = 1000000867000 + size = 476 + rendered = true + output buffer #29: + timeUs = 1000000933000 + size = 486 + rendered = true +AudioSink: + buffer count = 29 + config: + pcmEncoding = 2 + channelCount = 1 + sampleRate = 44100 + buffer #0: + time = 1000000062000 + data = 1 + buffer #1: + time = 1000000097000 + data = 1 + buffer #2: + time = 1000000131000 + data = 1 + buffer #3: + time = 1000000166000 + data = 1 + buffer #4: + time = 1000000201000 + data = 1 + buffer #5: + time = 1000000236000 + data = 1 + buffer #6: + time = 1000000270000 + data = 1 + buffer #7: + time = 1000000306000 + data = 1 + buffer #8: + time = 1000000341000 + data = 1 + buffer #9: + time = 1000000376000 + data = 1 + buffer #10: + time = 1000000410000 + data = 1 + buffer #11: + time = 1000000445000 + data = 1 + buffer #12: + time = 1000000480000 + data = 1 + buffer #13: + time = 1000000514000 + data = 1 + buffer #14: + time = 1000000550000 + data = 1 + buffer #15: + time = 1000000585000 + data = 1 + buffer #16: + time = 1000000620000 + data = 1 + buffer #17: + time = 1000000654000 + data = 1 + buffer #18: + time = 1000000690000 + data = 1 + buffer #19: + time = 1000000724000 + data = 1 + buffer #20: + time = 1000000759000 + data = 1 + buffer #21: + time = 1000000793000 + data = 1 + buffer #22: + time = 1000000829000 + data = 1 + buffer #23: + time = 1000000864000 + data = 1 + buffer #24: + time = 1000000898000 + data = 1 + buffer #25: + time = 1000000932000 + data = 1 + buffer #26: + time = 1000000968000 + data = 1 + buffer #27: + time = 1000001002000 + data = 1 + buffer #28: + time = 1000001037000 + data = 1 +TextOutput: + Subtitle[0]: + presentationTimeUs = 0 + Cues = []