Skip to content

Commit

Permalink
Fix preroll sample handling for non-keyframe media start positions
Browse files Browse the repository at this point in the history
When processing edit lists in MP4 files, the media start position may be a non-keyframe. To ensure proper playback, the decoder must preroll to the preceding keyframe, as these preroll samples are essential for decoding but are not rendered.

Issue: google/ExoPlayer#1659

#cherrypick

PiperOrigin-RevId: 673457615
  • Loading branch information
rohitjoins authored and copybara-github committed Sep 11, 2024
1 parent d9a6784 commit f133e8d
Show file tree
Hide file tree
Showing 18 changed files with 978 additions and 421 deletions.
3 changes: 3 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
* Transformer:
* Track Selection:
* Extractors:
* Fix preroll sample handling for non-keyframe media start positions when
processing edit lists in MP4 files
([#1659](https://github.com/google/ExoPlayer/issues/1659)).
* DataSource:
* Audio:
* Video:
Expand Down
24 changes: 24 additions & 0 deletions libraries/common/src/main/java/androidx/media3/common/Format.java
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ public static final class Builder {
@Nullable private List<byte[]> initializationData;
@Nullable private DrmInitData drmInitData;
private long subsampleOffsetUs;
private boolean hasPrerollSamples;

// Video specific.

Expand Down Expand Up @@ -256,6 +257,7 @@ private Builder(Format format) {
this.initializationData = format.initializationData;
this.drmInitData = format.drmInitData;
this.subsampleOffsetUs = format.subsampleOffsetUs;
this.hasPrerollSamples = format.hasPrerollSamples;
// Video specific.
this.width = format.width;
this.height = format.height;
Expand Down Expand Up @@ -543,6 +545,18 @@ public Builder setSubsampleOffsetUs(long subsampleOffsetUs) {
return this;
}

/**
* Sets {@link Format#hasPrerollSamples}. The default value is {@code false}.
*
* @param hasPrerollSamples The {@link Format#hasPrerollSamples}.
* @return The builder.
*/
@CanIgnoreReturnValue
public Builder setHasPrerollSamples(boolean hasPrerollSamples) {
this.hasPrerollSamples = hasPrerollSamples;
return this;
}

// Video specific.

/**
Expand Down Expand Up @@ -952,6 +966,15 @@ public Format build() {
*/
@UnstableApi public final long subsampleOffsetUs;

/**
* Indicates whether the stream contains preroll samples.
*
* <p>When this field is set to {@code true}, it means that the stream includes decode-only
* samples that occur before the intended playback start position. These samples are necessary for
* decoding but are not meant to be rendered and should be skipped after decoding.
*/
@UnstableApi public final boolean hasPrerollSamples;

// Video specific.

/** The width of the video in pixels, or {@link #NO_VALUE} if unknown or not applicable. */
Expand Down Expand Up @@ -1092,6 +1115,7 @@ private Format(Builder builder) {
builder.initializationData == null ? Collections.emptyList() : builder.initializationData;
drmInitData = builder.drmInitData;
subsampleOffsetUs = builder.subsampleOffsetUs;
hasPrerollSamples = builder.hasPrerollSamples;
// Video specific.
width = builder.width;
height = builder.height;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ interface Listener {

private boolean seenFirstTrackSelection;
private boolean notifyDiscontinuity;
private boolean pendingInitialDiscontinuity;
private int enabledTrackCount;
private boolean isLengthKnown;

Expand Down Expand Up @@ -295,6 +296,7 @@ public long selectTracks(
Assertions.checkState(!trackEnabledStates[track]);
enabledTrackCount++;
trackEnabledStates[track] = true;
pendingInitialDiscontinuity |= selection.getSelectedFormat().hasPrerollSamples;
streams[i] = new SampleStreamImpl(track);
streamResetFlags[i] = true;
// If there's still a chance of avoiding a seek, try and seek within the sample queue.
Expand All @@ -312,6 +314,7 @@ public long selectTracks(
if (enabledTrackCount == 0) {
pendingDeferredRetry = false;
notifyDiscontinuity = false;
pendingInitialDiscontinuity = false;
if (loader.isLoading()) {
// Discard as much as we can synchronously.
for (SampleQueue sampleQueue : sampleQueues) {
Expand Down Expand Up @@ -387,6 +390,11 @@ public long getNextLoadPositionUs() {

@Override
public long readDiscontinuity() {
if (pendingInitialDiscontinuity) {
pendingInitialDiscontinuity = false;
return lastSeekPositionUs;
}

if (notifyDiscontinuity
&& (loadingFinished || getExtractedSamplesCount() > extractedSamplesCountAtStartOfLoad)) {
notifyDiscontinuity = false;
Expand Down Expand Up @@ -451,6 +459,7 @@ && seekInsideBufferUs(trackIsAudioVideoFlags, positionUs)) {
pendingDeferredRetry = false;
pendingResetPositionUs = positionUs;
loadingFinished = false;
pendingInitialDiscontinuity = false;
if (loader.isLoading()) {
// Discard as much as we can synchronously.
for (SampleQueue sampleQueue : sampleQueues) {
Expand Down Expand Up @@ -810,6 +819,7 @@ private void maybeFinishPrepare() {
}
trackFormat = trackFormat.copyWithCryptoType(drmSessionManager.getCryptoType(trackFormat));
trackArray[i] = new TrackGroup(/* id= */ Integer.toString(i), trackFormat);
pendingInitialDiscontinuity |= trackFormat.hasPrerollSamples;
}
trackState = new TrackState(new TrackGroupArray(trackArray), trackIsAudioVideoFlags);
if (isSingleSample && durationUs == C.TIME_UNSET) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -714,9 +714,8 @@ public static TrackSampleTable parseStbl(
// sorted, but will ensure that a) all sync frames are in-order and b) any out-of-order
// frames are after their respective sync frames. This means that although the result of
// this binary search might be slightly incorrect (due to out-of-order timestamps), the loop
// below that walks forward to find the next sync frame will result in a correct start
// index. The start index would also be correct if we walk backwards to the previous sync
// frame (https://github.com/google/ExoPlayer/issues/1659).
// below that walks backward to find the previous sync frame will result in a correct start
// index.
startIndices[i] =
Util.binarySearchFloor(
timestamps, editMediaTime, /* inclusive= */ true, /* stayInBounds= */ true);
Expand All @@ -726,13 +725,8 @@ public static TrackSampleTable parseStbl(
editMediaTime + editDuration,
/* inclusive= */ omitZeroDurationClippedSample,
/* stayInBounds= */ false);
while (startIndices[i] < endIndices[i]
&& (flags[startIndices[i]] & C.BUFFER_FLAG_KEY_FRAME) == 0) {
// Applying the edit correctly would require prerolling from the previous sync sample. In
// the current implementation we advance to the next sync sample instead. Only other
// tracks (i.e. audio) will be rendered until the time of the first sync sample.
// See https://github.com/google/ExoPlayer/issues/1659.
startIndices[i]++;
while (startIndices[i] >= 0 && (flags[startIndices[i]] & C.BUFFER_FLAG_KEY_FRAME) == 0) {
startIndices[i]--;
}
editedSampleCount += endIndices[i] - startIndices[i];
copyMetadata |= nextSampleIndex != startIndices[i];
Expand All @@ -749,6 +743,7 @@ public static TrackSampleTable parseStbl(
long[] editedTimestamps = new long[editedSampleCount];
long pts = 0;
int sampleIndex = 0;
boolean hasPrerollSamples = false;
for (int i = 0; i < track.editListDurations.length; i++) {
long editMediaTime = track.editListMediaTimes[i];
int startIndex = startIndices[i];
Expand All @@ -764,8 +759,8 @@ public static TrackSampleTable parseStbl(
long timeInSegmentUs =
Util.scaleLargeTimestamp(
timestamps[j] - editMediaTime, C.MICROS_PER_SECOND, track.timescale);
if (canTrimSamplesWithTimestampChange(track.type)) {
timeInSegmentUs = max(0, timeInSegmentUs);
if (timeInSegmentUs < 0) {
hasPrerollSamples = true;
}
editedTimestamps[sampleIndex] = ptsUs + timeInSegmentUs;
if (copyMetadata && editedSizes[sampleIndex] > editedMaximumSize) {
Expand All @@ -777,6 +772,10 @@ public static TrackSampleTable parseStbl(
}
long editedDurationUs =
Util.scaleLargeTimestamp(pts, C.MICROS_PER_SECOND, track.movieTimescale);
if (hasPrerollSamples) {
Format format = track.format.buildUpon().setHasPrerollSamples(true).build();
track = track.copyWithFormat(format);
}
return new TrackSampleTable(
track,
editedOffsets,
Expand All @@ -787,12 +786,6 @@ public static TrackSampleTable parseStbl(
editedDurationUs);
}

private static boolean canTrimSamplesWithTimestampChange(@C.TrackType int trackType) {
// Audio samples have an inherent duration and we can't trim data by changing the sample
// timestamp alone.
return trackType != C.TRACK_TYPE_AUDIO;
}

@Nullable
private static Metadata parseUdtaMeta(ParsableByteArray meta, int limit) {
meta.skipBytes(Mp4Box.HEADER_SIZE);
Expand Down
Loading

0 comments on commit f133e8d

Please sign in to comment.