diff --git a/doc/classes/VideoStreamPlayer.xml b/doc/classes/VideoStreamPlayer.xml index f903f171d10..46bc4f491d6 100644 --- a/doc/classes/VideoStreamPlayer.xml +++ b/doc/classes/VideoStreamPlayer.xml @@ -16,7 +16,6 @@ The length of the current stream, in seconds. - [b]Note:[/b] For [VideoStreamTheora] streams (the built-in format supported by Godot), this value will always be zero, as getting the stream length is not implemented yet. The feature may be supported by video formats implemented by a GDExtension add-on. @@ -79,7 +78,6 @@ The current position of the stream, in seconds. - [b]Note:[/b] Changing this value won't have any effect as seeking is not implemented yet, except in video formats implemented by a GDExtension add-on. Audio volume as a linear value. diff --git a/modules/theora/video_stream_theora.cpp b/modules/theora/video_stream_theora.cpp index 8c7d639a6b9..0c7014d2cd2 100644 --- a/modules/theora/video_stream_theora.cpp +++ b/modules/theora/video_stream_theora.cpp @@ -39,41 +39,17 @@ int VideoStreamPlaybackTheora::buffer_data() { char *buffer = ogg_sync_buffer(&oy, 4096); -#ifdef THEORA_USE_THREAD_STREAMING - - int read; - - do { - thread_sem->post(); - read = MIN(ring_buffer.data_left(), 4096); - if (read) { - ring_buffer.read((uint8_t *)buffer, read); - ogg_sync_wrote(&oy, read); - } else { - OS::get_singleton()->delay_usec(100); - } - - } while (read == 0); - - return read; - -#else - uint64_t bytes = file->get_buffer((uint8_t *)buffer, 4096); ogg_sync_wrote(&oy, bytes); - return (bytes); - -#endif + return bytes; } int VideoStreamPlaybackTheora::queue_page(ogg_page *page) { - if (theora_p) { - ogg_stream_pagein(&to, page); - if (to.e_o_s) { - theora_eos = true; - } + ogg_stream_pagein(&to, page); + if (to.e_o_s) { + theora_eos = true; } - if (vorbis_p) { + if (has_audio) { ogg_stream_pagein(&vo, page); if (vo.e_o_s) { vorbis_eos = true; @@ -82,126 +58,243 @@ int VideoStreamPlaybackTheora::queue_page(ogg_page *page) { return 0; } -void VideoStreamPlaybackTheora::video_write() { - th_ycbcr_buffer yuv; - th_decode_ycbcr_out(td, yuv); +int VideoStreamPlaybackTheora::read_page(ogg_page *page) { + int ret = 0; + + while (ret <= 0) { + ret = ogg_sync_pageout(&oy, page); + if (ret <= 0) { + int bytes = buffer_data(); + if (bytes == 0) { + return 0; + } + } + } - int pitch = 4; - frame_data.resize(size.x * size.y * pitch); - { - uint8_t *w = frame_data.ptrw(); - char *dst = (char *)w; + return ret; +} - if (px_fmt == TH_PF_444) { - yuv444_2_rgb8888((uint8_t *)dst, (uint8_t *)yuv[0].data, (uint8_t *)yuv[1].data, (uint8_t *)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x << 2); +double VideoStreamPlaybackTheora::get_page_time(ogg_page *page) { + uint64_t granulepos = ogg_page_granulepos(page); + int page_serialno = ogg_page_serialno(page); + double page_time = -1; - } else if (px_fmt == TH_PF_422) { - yuv422_2_rgb8888((uint8_t *)dst, (uint8_t *)yuv[0].data, (uint8_t *)yuv[1].data, (uint8_t *)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x << 2); + if (page_serialno == to.serialno) { + page_time = th_granule_time(td, granulepos); + } + if (has_audio && page_serialno == vo.serialno) { + page_time = vorbis_granule_time(&vd, granulepos); + } - } else if (px_fmt == TH_PF_420) { - yuv420_2_rgb8888((uint8_t *)dst, (uint8_t *)yuv[0].data, (uint8_t *)yuv[1].data, (uint8_t *)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x << 2); + return page_time; +} + +// Read one buffer worth of pages and feed them to the streams. +int VideoStreamPlaybackTheora::feed_pages() { + int pages = 0; + ogg_page og; + + while (pages == 0) { + while (ogg_sync_pageout(&oy, &og) > 0) { + queue_page(&og); + pages++; } + if (pages == 0) { + int bytes = buffer_data(); + if (bytes == 0) { + break; + } + } + } - format = Image::FORMAT_RGBA8; + return pages; +} + +// Seek the video and audio streams simultaneously to find the granulepos where we should start decoding. +// It will return the position where we should start reading pages, and the video and audio granulepos. +int64_t VideoStreamPlaybackTheora::seek_streams(double p_time, int64_t &cur_video_granulepos, int64_t &cur_audio_granulepos) { + // Backtracking less than this is probably a waste of time. + const int64_t min_seek = 512 * 1024; + int64_t target_video_granulepos; + int64_t target_audio_granulepos; + double target_time = 0; + int64_t seek_pos; + + // Make a guess where we should start reading in the file, and scan from there. + // We base the guess on the mean bitrate of the streams. It would be theoretically faster to use the bisect method but + // in practice there's a lot of linear scanning to do to find the right pages. + // We want to catch the previous keyframe to the seek time. Since we only know the max GOP, we use that. + if (p_time == -1) { // This is a special case to find the last packets and calculate the video length. + seek_pos = MAX(stream_data_size - min_seek, stream_data_offset); + target_video_granulepos = INT64_MAX; + target_audio_granulepos = INT64_MAX; + } else { + int64_t video_frame = (int64_t)(p_time / frame_duration); + target_video_granulepos = MAX(1LL, video_frame - (1LL << ti.keyframe_granule_shift)) << ti.keyframe_granule_shift; + target_audio_granulepos = 0; + seek_pos = MAX(((target_video_granulepos >> ti.keyframe_granule_shift) - 1) * frame_duration * stream_data_size / stream_length, stream_data_offset); + target_time = th_granule_time(td, target_video_granulepos); + if (has_audio) { + target_audio_granulepos = video_frame * frame_duration * vi.rate; + target_time = MIN(target_time, vorbis_granule_time(&vd, target_audio_granulepos)); + } } - Ref img = memnew(Image(size.x, size.y, false, Image::FORMAT_RGBA8, frame_data)); //zero copy image creation + int64_t video_seek_pos = seek_pos; + int64_t audio_seek_pos = seek_pos; + double backtrack_time = 0; + bool video_catch = false; + bool audio_catch = false; + int64_t last_video_granule_seek_pos = seek_pos; + int64_t last_audio_granule_seek_pos = seek_pos; - texture->update(img); //zero copy send to rendering server + cur_video_granulepos = -1; + cur_audio_granulepos = -1; - frames_pending = 1; + while (!video_catch || (has_audio && !audio_catch)) { // Backtracking loop + if (seek_pos < stream_data_offset) { + seek_pos = stream_data_offset; + } + file->seek(seek_pos); + ogg_sync_reset(&oy); + + backtrack_time = 0; + last_video_granule_seek_pos = seek_pos; + last_audio_granule_seek_pos = seek_pos; + while (!video_catch || (has_audio && !audio_catch)) { // Page scanning loop + ogg_page page; + uint64_t last_seek_pos = file->get_position() - oy.fill + oy.returned; + int ret = read_page(&page); + if (ret <= 0) { // End of file. + if (seek_pos < stream_data_offset) { // We've already searched the whole file + return -1; + } + seek_pos -= min_seek; + break; + } + int64_t cur_granulepos = ogg_page_granulepos(&page); + if (cur_granulepos >= 0) { + int page_serialno = ogg_page_serialno(&page); + if (!video_catch && page_serialno == to.serialno) { + if (cur_granulepos >= target_video_granulepos) { + video_catch = true; + if (cur_video_granulepos < 0) { + // Adding 1s helps catching the start of the page and avoids backtrack_time = 0. + backtrack_time = MAX(backtrack_time, 1 + th_granule_time(td, cur_granulepos) - target_time); + } + } else { + video_seek_pos = last_video_granule_seek_pos; + cur_video_granulepos = cur_granulepos; + } + last_video_granule_seek_pos = last_seek_pos; + } + if ((has_audio && !audio_catch) && page_serialno == vo.serialno) { + if (cur_granulepos >= target_audio_granulepos) { + audio_catch = true; + if (cur_audio_granulepos < 0) { + // Adding 1s helps catching the start of the page and avoids backtrack_time = 0. + backtrack_time = MAX(backtrack_time, 1 + vorbis_granule_time(&vd, cur_granulepos) - target_time); + } + } else { + audio_seek_pos = last_audio_granule_seek_pos; + cur_audio_granulepos = cur_granulepos; + } + last_audio_granule_seek_pos = last_seek_pos; + } + } + } + if (backtrack_time > 0) { + if (seek_pos <= stream_data_offset) { + break; + } + int64_t delta_seek = MAX(backtrack_time * stream_data_size / stream_length, min_seek); + seek_pos -= delta_seek; + } + video_catch = cur_video_granulepos != -1; + audio_catch = cur_audio_granulepos != -1; + } + + if (cur_video_granulepos < (1LL << ti.keyframe_granule_shift)) { + video_seek_pos = stream_data_offset; + cur_video_granulepos = 1LL << ti.keyframe_granule_shift; + } + if (has_audio) { + if (cur_audio_granulepos == -1) { + audio_seek_pos = stream_data_offset; + cur_audio_granulepos = 0; + } + seek_pos = MIN(video_seek_pos, audio_seek_pos); + } else { + seek_pos = video_seek_pos; + } + + return seek_pos; } -void VideoStreamPlaybackTheora::clear() { - if (file.is_null()) { - return; +void VideoStreamPlaybackTheora::video_write(th_ycbcr_buffer yuv) { + uint8_t *w = frame_data.ptrw(); + char *dst = (char *)w; + uint32_t y_offset = region.position.y * yuv[0].stride + region.position.x; + uint32_t uv_offset = region.position.y * yuv[1].stride + region.position.x; + + if (px_fmt == TH_PF_444) { + yuv444_2_rgb8888((uint8_t *)dst, (uint8_t *)yuv[0].data + y_offset, (uint8_t *)yuv[1].data + uv_offset, (uint8_t *)yuv[2].data + uv_offset, region.size.x, region.size.y, yuv[0].stride, yuv[1].stride, region.size.x << 2); + } else if (px_fmt == TH_PF_422) { + yuv422_2_rgb8888((uint8_t *)dst, (uint8_t *)yuv[0].data + y_offset, (uint8_t *)yuv[1].data + uv_offset, (uint8_t *)yuv[2].data + uv_offset, region.size.x, region.size.y, yuv[0].stride, yuv[1].stride, region.size.x << 2); + } else if (px_fmt == TH_PF_420) { + yuv420_2_rgb8888((uint8_t *)dst, (uint8_t *)yuv[0].data + y_offset, (uint8_t *)yuv[1].data + uv_offset, (uint8_t *)yuv[2].data + uv_offset, region.size.x, region.size.y, yuv[0].stride, yuv[1].stride, region.size.x << 2); } - if (vorbis_p) { - ogg_stream_clear(&vo); - if (vorbis_p >= 3) { - vorbis_block_clear(&vb); - vorbis_dsp_clear(&vd); - } + Ref img; + img.instantiate(region.size.x, region.size.y, false, Image::FORMAT_RGBA8, frame_data); //zero copy image creation + + texture->update(img); // Zero-copy send to rendering server. +} + +void VideoStreamPlaybackTheora::clear() { + if (!file.is_null()) { + file.unref(); + } + if (has_audio) { + vorbis_block_clear(&vb); + vorbis_dsp_clear(&vd); vorbis_comment_clear(&vc); vorbis_info_clear(&vi); - vorbis_p = 0; + ogg_stream_clear(&vo); + if (audio_buffer_size) { + memdelete_arr(audio_buffer); + } } - if (theora_p) { - ogg_stream_clear(&to); + if (has_video) { th_decode_free(td); th_comment_clear(&tc); th_info_clear(&ti); - theora_p = 0; - } - ogg_sync_clear(&oy); - -#ifdef THEORA_USE_THREAD_STREAMING - thread_exit = true; - thread_sem->post(); //just in case - thread.wait_to_finish(); - ring_buffer.clear(); -#endif - - theora_p = 0; - vorbis_p = 0; - videobuf_ready = 0; - frames_pending = 0; - videobuf_time = 0; - theora_eos = false; - vorbis_eos = false; + ogg_stream_clear(&to); + ogg_sync_clear(&oy); + } - file.unref(); + audio_buffer = nullptr; playing = false; -} - -void VideoStreamPlaybackTheora::set_file(const String &p_file) { - ERR_FAIL_COND(playing); - ogg_packet op; - th_setup_info *ts = nullptr; - - file_name = p_file; - file = FileAccess::open(p_file, FileAccess::READ); - ERR_FAIL_COND_MSG(file.is_null(), "Cannot open file '" + p_file + "'."); - -#ifdef THEORA_USE_THREAD_STREAMING - thread_exit = false; - thread_eof = false; - //pre-fill buffer - int to_read = ring_buffer.space_left(); - uint64_t read = file->get_buffer(read_buffer.ptr(), to_read); - ring_buffer.write(read_buffer.ptr(), read); - - thread.start(_streaming_thread, this); -#endif - - ogg_sync_init(&oy); - - /* init supporting Vorbis structures needed in header parsing */ - vorbis_info_init(&vi); - vorbis_comment_init(&vc); - - /* init supporting Theora structures needed in header parsing */ - th_comment_init(&tc); - th_info_init(&ti); - + has_video = false; + has_audio = false; theora_eos = false; vorbis_eos = false; +} - /* Ogg file open; parse the headers */ - /* Only interested in Vorbis/Theora streams */ +void VideoStreamPlaybackTheora::find_streams(th_setup_info *&ts) { + ogg_stream_state test; + ogg_packet op; + ogg_page og; int stateflag = 0; - int audio_track_skip = audio_track; + /* Only interested in Vorbis/Theora streams */ while (!stateflag) { int ret = buffer_data(); - if (ret == 0) { + if (!ret) { break; } while (ogg_sync_pageout(&oy, &og) > 0) { - ogg_stream_state test; - /* is this a mandated initial header? If not, stop parsing */ if (!ogg_page_bos(&og)) { /* don't leak the page; get it into the appropriate stream */ @@ -215,11 +308,11 @@ void VideoStreamPlaybackTheora::set_file(const String &p_file) { ogg_stream_packetout(&test, &op); /* identify the codec: try theora */ - if (!theora_p && th_decode_headerin(&ti, &tc, &ts, &op) >= 0) { + if (!has_video && th_decode_headerin(&ti, &tc, &ts, &op) >= 0) { /* it is theora */ memcpy(&to, &test, sizeof(test)); - theora_p = 1; - } else if (!vorbis_p && vorbis_synthesis_headerin(&vi, &vc, &op) >= 0) { + has_video = true; + } else if (!has_audio && vorbis_synthesis_headerin(&vi, &vc, &op) >= 0) { /* it is vorbis */ if (audio_track_skip) { vorbis_info_clear(&vi); @@ -227,138 +320,165 @@ void VideoStreamPlaybackTheora::set_file(const String &p_file) { ogg_stream_clear(&test); vorbis_info_init(&vi); vorbis_comment_init(&vc); - audio_track_skip--; } else { memcpy(&vo, &test, sizeof(test)); - vorbis_p = 1; + has_audio = true; } } else { /* whatever it is, we don't care about it */ ogg_stream_clear(&test); } } - /* fall through to non-bos page parsing */ } +} - /* we're expecting more header packets. */ - while ((theora_p && theora_p < 3) || (vorbis_p && vorbis_p < 3)) { - int ret = 0; +void VideoStreamPlaybackTheora::read_headers(th_setup_info *&ts) { + ogg_packet op; + int theora_header_packets = 1; + int vorbis_header_packets = 1; + /* we're expecting more header packets. */ + while (theora_header_packets < 3 || (has_audio && vorbis_header_packets < 3)) { /* look for further theora headers */ - if (theora_p && theora_p < 3) { - ret = ogg_stream_packetout(&to, &op); - } - while (theora_p && theora_p < 3 && ret) { - if (ret < 0) { - fprintf(stderr, "Error parsing Theora stream headers; corrupt stream?\n"); - clear(); - return; + // The API says there can be more than three but only three are mandatory. + while (theora_header_packets < 3 && ogg_stream_packetout(&to, &op) > 0) { + if (th_decode_headerin(&ti, &tc, &ts, &op) > 0) { + theora_header_packets++; } - if (!th_decode_headerin(&ti, &tc, &ts, &op)) { - fprintf(stderr, "Error parsing Theora stream headers; corrupt stream?\n"); - clear(); - return; - } - ret = ogg_stream_packetout(&to, &op); - theora_p++; } /* look for more vorbis header packets */ - if (vorbis_p && vorbis_p < 3) { - ret = ogg_stream_packetout(&vo, &op); - } - while (vorbis_p && vorbis_p < 3 && ret) { - if (ret < 0) { - fprintf(stderr, "Error parsing Vorbis stream headers; corrupt stream?\n"); - clear(); - return; - } - ret = vorbis_synthesis_headerin(&vi, &vc, &op); - if (ret) { - fprintf(stderr, "Error parsing Vorbis stream headers; corrupt stream?\n"); - clear(); - return; - } - vorbis_p++; - if (vorbis_p == 3) { - break; + while (has_audio && vorbis_header_packets < 3 && ogg_stream_packetout(&vo, &op) > 0) { + if (!vorbis_synthesis_headerin(&vi, &vc, &op)) { + vorbis_header_packets++; } - ret = ogg_stream_packetout(&vo, &op); } - /* The header pages/packets will arrive before anything else we - care about, or the stream is not obeying spec */ - - if (ogg_sync_pageout(&oy, &og) > 0) { - queue_page(&og); /* demux into the appropriate stream */ - } else { - int ret2 = buffer_data(); /* someone needs more data */ - if (ret2 == 0) { + /* The header pages/packets will arrive before anything else we care about, or the stream is not obeying spec */ + if (theora_header_packets < 3 || (has_audio && vorbis_header_packets < 3)) { + ogg_page page; + if (read_page(&page)) { + queue_page(&page); + } else { fprintf(stderr, "End of file while searching for codec headers.\n"); - clear(); - return; + break; } } } - /* And now we have it all. Initialize decoders. */ - if (theora_p) { - td = th_decode_alloc(&ti, ts); - px_fmt = ti.pixel_fmt; - switch (ti.pixel_fmt) { - case TH_PF_420: - //printf(" 4:2:0 video\n"); - break; - case TH_PF_422: - //printf(" 4:2:2 video\n"); - break; - case TH_PF_444: - //printf(" 4:4:4 video\n"); - break; - case TH_PF_RSVD: - default: - printf(" video\n (UNKNOWN Chroma sampling!)\n"); - break; + has_video = theora_header_packets == 3; + has_audio = vorbis_header_packets == 3; +} + +void VideoStreamPlaybackTheora::set_file(const String &p_file) { + ERR_FAIL_COND(playing); + th_setup_info *ts = nullptr; + + clear(); + + file = FileAccess::open(p_file, FileAccess::READ); + ERR_FAIL_COND_MSG(file.is_null(), "Cannot open file '" + p_file + "'."); + + file_name = p_file; + + ogg_sync_init(&oy); + + /* init supporting Vorbis structures needed in header parsing */ + vorbis_info_init(&vi); + vorbis_comment_init(&vc); + + /* init supporting Theora structures needed in header parsing */ + th_comment_init(&tc); + th_info_init(&ti); + + /* Zero stream state structs so they can be checked later. */ + memset(&to, 0, sizeof(to)); + memset(&vo, 0, sizeof(vo)); + + /* Ogg file open; parse the headers */ + find_streams(ts); + read_headers(ts); + + if (!has_audio) { + vorbis_comment_clear(&vc); + vorbis_info_clear(&vi); + if (!ogg_stream_check(&vo)) { + ogg_stream_clear(&vo); } - th_decode_ctl(td, TH_DECCTL_GET_PPLEVEL_MAX, &pp_level_max, - sizeof(pp_level_max)); - pp_level = 0; - th_decode_ctl(td, TH_DECCTL_SET_PPLEVEL, &pp_level, sizeof(pp_level)); - pp_inc = 0; - - int w; - int h; - w = ((ti.pic_x + ti.frame_width + 1) & ~1) - (ti.pic_x & ~1); - h = ((ti.pic_y + ti.frame_height + 1) & ~1) - (ti.pic_y & ~1); - size.x = w; - size.y = h; - - Ref img = Image::create_empty(w, h, false, Image::FORMAT_RGBA8); - texture->set_image(img); + } - } else { - /* tear down the partial theora setup */ - th_info_clear(&ti); + // One video stream is mandatory. + if (!has_video) { + th_setup_free(ts); th_comment_clear(&tc); + th_info_clear(&ti); + if (!ogg_stream_check(&to)) { + ogg_stream_clear(&to); + } + file.unref(); + return; } + /* And now we have it all. Initialize decoders. */ + td = th_decode_alloc(&ti, ts); th_setup_free(ts); + px_fmt = ti.pixel_fmt; + switch (ti.pixel_fmt) { + case TH_PF_420: + case TH_PF_422: + case TH_PF_444: + break; + default: + WARN_PRINT(" video\n (UNKNOWN Chroma sampling!)\n"); + break; + } + th_decode_ctl(td, TH_DECCTL_GET_PPLEVEL_MAX, &pp_level_max, sizeof(pp_level_max)); + pp_level = 0; + th_decode_ctl(td, TH_DECCTL_SET_PPLEVEL, &pp_level, sizeof(pp_level)); + pp_inc = 0; + + size.x = ti.frame_width; + size.y = ti.frame_height; + region.position.x = ti.pic_x; + region.position.y = ti.pic_y; + region.size.x = ti.pic_width; + region.size.y = ti.pic_height; - if (vorbis_p) { + Ref img = Image::create_empty(region.size.x, region.size.y, false, Image::FORMAT_RGBA8); + texture->set_image(img); + frame_data.resize(region.size.x * region.size.y * 4); + + frame_duration = (double)ti.fps_denominator / ti.fps_numerator; + + if (has_audio) { vorbis_synthesis_init(&vd, &vi); vorbis_block_init(&vd, &vb); - //_setup(vi.channels, vi.rate); - } else { - /* tear down the partial vorbis setup */ - vorbis_info_clear(&vi); - vorbis_comment_clear(&vc); + audio_buffer_size = MIN(vi.channels, 8) * 1024; + audio_buffer = memnew_arr(float, audio_buffer_size); } - playing = false; - buffering = true; - time = 0; - audio_frames_wrote = 0; + stream_data_offset = file->get_position() - oy.fill + oy.returned; + stream_data_size = file->get_length() - stream_data_offset; + + // Sync to last page to find video length. + int64_t seek_pos = MAX(stream_data_offset, (int64_t)file->get_length() - 64 * 1024); + int64_t video_granulepos = INT64_MAX; + int64_t audio_granulepos = INT64_MAX; + file->seek(seek_pos); + seek_pos = seek_streams(-1, video_granulepos, audio_granulepos); + file->seek(seek_pos); + ogg_sync_reset(&oy); + + stream_length = 0; + ogg_page page; + while (read_page(&page) > 0) { + // Use MAX because, even though pages are ordered, page time can be -1 + // for pages without full frames. Streams could be truncated too. + stream_length = MAX(stream_length, get_page_time(&page)); + } + + seek(0); } double VideoStreamPlaybackTheora::get_time() const { @@ -378,68 +498,45 @@ void VideoStreamPlaybackTheora::update(double p_delta) { } if (!playing || paused) { - //printf("not playing\n"); return; } -#ifdef THEORA_USE_THREAD_STREAMING - thread_sem->post(); -#endif - time += p_delta; - if (videobuf_time > get_time()) { - return; //no new frames need to be produced - } - - bool frame_done = false; - bool audio_done = !vorbis_p; - - while (!frame_done || (!audio_done && !vorbis_eos)) { - //a frame needs to be produced + double comp_time = get_time(); + bool audio_ready = false; + // Read data until we fill the audio buffer and get a new video frame. + while ((!audio_ready && !audio_done) || (!video_ready && !video_done)) { ogg_packet op; - bool no_theora = false; - bool buffer_full = false; - while (vorbis_p && !audio_done && !buffer_full) { - int ret; - float **pcm; + while (!audio_ready && !audio_done) { + // Send remaining frames. + if (!send_audio()) { + audio_ready = true; + break; + } - /* if there's pending, decoded audio, grab it */ - ret = vorbis_synthesis_pcmout(&vd, &pcm); + float **pcm; + int ret = vorbis_synthesis_pcmout(&vd, &pcm); if (ret > 0) { - const int AUXBUF_LEN = 4096; - int to_read = ret; - float aux_buffer[AUXBUF_LEN]; - - while (to_read) { - int m = MIN(AUXBUF_LEN / vi.channels, to_read); - + int frames_read = 0; + while (frames_read < ret) { + int m = MIN(audio_buffer_size / vi.channels, ret - frames_read); int count = 0; - for (int j = 0; j < m; j++) { for (int i = 0; i < vi.channels; i++) { - aux_buffer[count++] = pcm[i][j]; + audio_buffer[count++] = pcm[i][frames_read + j]; } } - - if (mix_callback) { - int mixed = mix_callback(mix_udata, aux_buffer, m); - to_read -= mixed; - if (mixed != m) { //could mix no more - buffer_full = true; - break; - } - } else { - to_read -= m; //just pretend we sent the audio + frames_read += m; + audio_ptr_end = m; + if (!send_audio()) { + audio_ready = true; + break; } } - - vorbis_synthesis_read(&vd, ret - to_read); - - audio_frames_wrote += ret - to_read; - + vorbis_synthesis_read(&vd, frames_read); } else { /* no pending audio; is there a pending packet to decode? */ if (ogg_stream_packetout(&vo, &op) > 0) { @@ -447,87 +544,45 @@ void VideoStreamPlaybackTheora::update(double p_delta) { vorbis_synthesis_blockin(&vd, &vb); } } else { /* we need more data; break out to suck in another page */ + audio_done = vorbis_eos; break; } } - - audio_done = videobuf_time < (audio_frames_wrote / float(vi.rate)); - - if (buffer_full) { - break; - } } - while (theora_p && !frame_done) { - /* theora is one in, one out... */ + while (!video_ready && !video_done) { if (ogg_stream_packetout(&to, &op) > 0) { - /*HACK: This should be set after a seek or a gap, but we might not have - a granulepos for the first packet (we only have them for the last - packet on a page), so we just set it as often as we get it. - To do this right, we should back-track from the last packet on the - page and compute the correct granulepos for the first packet after - a seek or a gap.*/ if (op.granulepos >= 0) { - th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, - sizeof(op.granulepos)); + th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, sizeof(op.granulepos)); } - ogg_int64_t videobuf_granulepos; - if (th_decode_packetin(td, &op, &videobuf_granulepos) == 0) { - videobuf_time = th_granule_time(td, videobuf_granulepos); - - //printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); - - /* is it already too old to be useful? This is only actually - useful cosmetically after a SIGSTOP. Note that we have to - decode the frame even if we don't show it (for now) due to - keyframing. Soon enough libtheora will be able to deal - with non-keyframe seeks. */ - - if (videobuf_time >= get_time()) { - frame_done = true; + int64_t videobuf_granulepos; + int ret = th_decode_packetin(td, &op, &videobuf_granulepos); + if (ret == 0 || ret == TH_DUPFRAME) { + next_frame_time = th_granule_time(td, videobuf_granulepos); + if (next_frame_time > comp_time) { + dup_frame = (ret == TH_DUPFRAME); + video_ready = true; } else { /*If we are too slow, reduce the pp level.*/ pp_inc = pp_level > 0 ? -1 : 0; } } - - } else { - no_theora = true; + } else { /* we need more data; break out to suck in another page */ + video_done = theora_eos; break; } } -#ifdef THEORA_USE_THREAD_STREAMING - if (file.is_valid() && thread_eof && no_theora && theora_eos && ring_buffer.data_left() == 0) { -#else - if (file.is_valid() && /*!videobuf_ready && */ no_theora && theora_eos) { -#endif - //printf("video done, stopping\n"); - stop(); - return; - } - - if (!frame_done || !audio_done) { - //what's the point of waiting for audio to grab a page? - - buffer_data(); - while (ogg_sync_pageout(&oy, &og) > 0) { - queue_page(&og); + if (!video_ready || !audio_ready) { + int ret = feed_pages(); + if (ret == 0) { + vorbis_eos = true; + theora_eos = true; + break; } } - /* If playback has begun, top audio buffer off immediately. */ - //if(stateflag) audio_write_nonblocking(); - - /* are we at or past time for this video frame? */ - if (videobuf_ready && videobuf_time <= get_time()) { - //video_write(); - //videobuf_ready=0; - } else { - //printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); - } - - double tdiff = videobuf_time - get_time(); + double tdiff = next_frame_time - comp_time; /*If we have lots of extra time, increase the post-processing level.*/ if (tdiff > ti.fps_denominator * 0.25 / ti.fps_numerator) { pp_inc = pp_level < pp_level_max ? 1 : 0; @@ -536,14 +591,27 @@ void VideoStreamPlaybackTheora::update(double p_delta) { } } - video_write(); + if (!video_ready && video_done && audio_done) { + stop(); + return; + } + + // Wait for the last frame to end before rendering the next one. + if (video_ready && comp_time >= current_frame_time) { + if (!dup_frame) { + th_ycbcr_buffer yuv; + th_decode_ycbcr_out(td, yuv); + video_write(yuv); + } + dup_frame = false; + video_ready = false; + current_frame_time = next_frame_time; + } } void VideoStreamPlaybackTheora::play() { - if (!playing) { - time = 0; - } else { - stop(); + if (playing) { + return; } playing = true; @@ -552,12 +620,8 @@ void VideoStreamPlaybackTheora::play() { } void VideoStreamPlaybackTheora::stop() { - if (playing) { - clear(); - set_file(file_name); //reset - } playing = false; - time = 0; + seek(0); } bool VideoStreamPlaybackTheora::is_playing() const { @@ -573,7 +637,7 @@ bool VideoStreamPlaybackTheora::is_paused() const { } double VideoStreamPlaybackTheora::get_length() const { - return 0; + return stream_length; } double VideoStreamPlaybackTheora::get_playback_position() const { @@ -581,7 +645,119 @@ double VideoStreamPlaybackTheora::get_playback_position() const { } void VideoStreamPlaybackTheora::seek(double p_time) { - WARN_PRINT_ONCE("Seeking in Theora videos is not implemented yet (it's only supported for GDExtension-provided video streams)."); + if (file.is_null()) { + return; + } + if (p_time >= stream_length) { + return; + } + + video_ready = false; + next_frame_time = 0; + current_frame_time = -1; + dup_frame = false; + video_done = false; + audio_done = !has_audio; + theora_eos = false; + vorbis_eos = false; + audio_ptr_start = 0; + audio_ptr_end = 0; + + ogg_stream_reset(&to); + if (has_audio) { + ogg_stream_reset(&vo); + vorbis_synthesis_restart(&vd); + } + + int64_t seek_pos; + int64_t video_granulepos; + int64_t audio_granulepos; + // Find the granules we need so we can start playing at the seek time. + seek_pos = seek_streams(p_time, video_granulepos, audio_granulepos); + if (seek_pos < 0) { + return; + } + file->seek(seek_pos); + ogg_sync_reset(&oy); + + time = p_time; + + double last_audio_time = 0; + double last_video_time = 0; + bool first_frame_decoded = false; + bool start_audio = false; + bool start_video = false; + bool keyframe_found = false; + uint64_t current_frame = 0; + // Read from the streams skipping pages until we reach the granules we want. We won't skip pages from both video and + // audio streams, only one of them, until decoding of both starts. + // video_granulepos and audio_granulepos are guaranteed to be found by checking the granulepos in the packets, no + // need to keep track of packets with granulepos == -1 until decoding starts. + while ((has_audio && last_audio_time < p_time) || (last_video_time <= p_time)) { + ogg_packet op; + if (feed_pages() == 0) { + break; + } + while (has_audio && last_audio_time < p_time && ogg_stream_packetout(&vo, &op) > 0) { + if (start_audio) { + if (vorbis_synthesis(&vb, &op) == 0) { /* test for success! */ + vorbis_synthesis_blockin(&vd, &vb); + float **pcm; + int samples_left = ceil((p_time - last_audio_time) * vi.rate); + int samples_read = vorbis_synthesis_pcmout(&vd, &pcm); + int samples_consumed = MIN(samples_left, samples_read); + vorbis_synthesis_read(&vd, samples_consumed); + last_audio_time += (double)samples_consumed / vi.rate; + } + } else if (op.granulepos >= audio_granulepos || audio_granulepos == 0) { + last_audio_time = vorbis_granule_time(&vd, op.granulepos); + // Start tracking audio now. This won't produce any samples but will update the decoder state. + if (vorbis_synthesis_trackonly(&vb, &op) == 0) { + vorbis_synthesis_blockin(&vd, &vb); + } + start_audio = true; + } + } + while (last_video_time <= p_time && ogg_stream_packetout(&to, &op) > 0) { + if (!start_video && (op.granulepos >= video_granulepos || video_granulepos == (1LL << ti.keyframe_granule_shift))) { + if (op.granulepos > 0) { + current_frame = th_granule_frame(td, op.granulepos); + } + start_video = true; + } + // Don't start decoding until a keyframe is found, but count frames. + if (start_video) { + if (!keyframe_found && th_packet_iskeyframe(&op)) { + keyframe_found = true; + int64_t cur_granulepos = (current_frame + 1) << ti.keyframe_granule_shift; + th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &cur_granulepos, sizeof(cur_granulepos)); + } + if (keyframe_found) { + int64_t videobuf_granulepos; + int ret = th_decode_packetin(td, &op, &videobuf_granulepos); + if (ret == 0 || ret == TH_DUPFRAME) { + last_video_time = th_granule_time(td, videobuf_granulepos); + first_frame_decoded = true; + } + } else { + current_frame++; + } + } + } + } + + if (first_frame_decoded) { + if (is_playing()) { + // Draw the current frame. + th_ycbcr_buffer yuv; + th_decode_ycbcr_out(td, yuv); + video_write(yuv); + current_frame_time = last_video_time; + } else { + next_frame_time = current_frame_time; + video_ready = true; + } + } } int VideoStreamPlaybackTheora::get_channels() const { @@ -596,44 +772,11 @@ int VideoStreamPlaybackTheora::get_mix_rate() const { return vi.rate; } -#ifdef THEORA_USE_THREAD_STREAMING - -void VideoStreamPlaybackTheora::_streaming_thread(void *ud) { - VideoStreamPlaybackTheora *vs = static_cast(ud); - - while (!vs->thread_exit) { - //just fill back the buffer - if (!vs->thread_eof) { - int to_read = vs->ring_buffer.space_left(); - if (to_read > 0) { - uint64_t read = vs->file->get_buffer(vs->read_buffer.ptr(), to_read); - vs->ring_buffer.write(vs->read_buffer.ptr(), read); - vs->thread_eof = vs->file->eof_reached(); - } - } - - vs->thread_sem->wait(); - } -} - -#endif - VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() { texture.instantiate(); - -#ifdef THEORA_USE_THREAD_STREAMING - int rb_power = nearest_shift(RB_SIZE_KB * 1024); - ring_buffer.resize(rb_power); - read_buffer.resize(RB_SIZE_KB * 1024); - thread_sem = Semaphore::create(); - -#endif } VideoStreamPlaybackTheora::~VideoStreamPlaybackTheora() { -#ifdef THEORA_USE_THREAD_STREAMING - memdelete(thread_sem); -#endif clear(); } diff --git a/modules/theora/video_stream_theora.h b/modules/theora/video_stream_theora.h index 5e6630f1e2f..eec83c4dd92 100644 --- a/modules/theora/video_stream_theora.h +++ b/modules/theora/video_stream_theora.h @@ -41,34 +41,37 @@ class ImageTexture; -//#define THEORA_USE_THREAD_STREAMING - class VideoStreamPlaybackTheora : public VideoStreamPlayback { GDCLASS(VideoStreamPlaybackTheora, VideoStreamPlayback); - enum { - MAX_FRAMES = 4, - }; - - //Image frames[MAX_FRAMES]; Image::Format format = Image::Format::FORMAT_L8; Vector frame_data; int frames_pending = 0; Ref file; String file_name; - int audio_frames_wrote = 0; Point2i size; + Rect2i region; + + float *audio_buffer = nullptr; + int audio_buffer_size; + int audio_ptr_start; + int audio_ptr_end; int buffer_data(); int queue_page(ogg_page *page); - void video_write(); + int read_page(ogg_page *page); + int feed_pages(); + double get_page_time(ogg_page *page); + int64_t seek_streams(double p_time, int64_t &video_granulepos, int64_t &audio_granulepos); + void find_streams(th_setup_info *&ts); + void read_headers(th_setup_info *&ts); + void video_write(th_ycbcr_buffer yuv); double get_time() const; bool theora_eos = false; bool vorbis_eos = false; ogg_sync_state oy; - ogg_page og; ogg_stream_state vo; ogg_stream_state to; th_info ti; @@ -79,48 +82,51 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback { vorbis_block vb; vorbis_comment vc; th_pixel_fmt px_fmt; - double videobuf_time = 0; - int pp_inc = 0; + double frame_duration; + double stream_length; + int64_t stream_data_offset; + int64_t stream_data_size; - int theora_p = 0; - int vorbis_p = 0; int pp_level_max = 0; int pp_level = 0; - int videobuf_ready = 0; + int pp_inc = 0; bool playing = false; - bool buffering = false; + bool paused = false; + + bool dup_frame = false; + bool has_video = false; + bool has_audio = false; + bool video_ready = false; + bool video_done = false; + bool audio_done = false; - double last_update_time = 0; double time = 0; + double next_frame_time = 0; + double current_frame_time = 0; double delay_compensation = 0; Ref texture; - bool paused = false; - -#ifdef THEORA_USE_THREAD_STREAMING - - enum { - RB_SIZE_KB = 1024 - }; - - RingBuffer ring_buffer; - Vector read_buffer; - bool thread_eof = false; - Semaphore *thread_sem = nullptr; - Thread thread; - SafeFlag thread_exit; - - static void _streaming_thread(void *ud); - -#endif - int audio_track = 0; protected: void clear(); + _FORCE_INLINE_ bool send_audio() { + if (audio_ptr_end > 0) { + int mixed = mix_callback(mix_udata, &audio_buffer[audio_ptr_start * vi.channels], audio_ptr_end - audio_ptr_start); + audio_ptr_start += mixed; + if (audio_ptr_start == audio_ptr_end) { + audio_ptr_start = 0; + audio_ptr_end = 0; + } else { + return false; + } + } + return true; + } + public: virtual void play() override; virtual void stop() override; diff --git a/scene/gui/video_stream_player.cpp b/scene/gui/video_stream_player.cpp index 1a4bace9a50..e7b8bc405c1 100644 --- a/scene/gui/video_stream_player.cpp +++ b/scene/gui/video_stream_player.cpp @@ -158,6 +158,7 @@ void VideoStreamPlayer::_notification(int p_notification) { playback->update(delta); // playback->is_playing() returns false in the last video frame if (!playback->is_playing()) { + resampler.flush(); if (loop) { play(); return; @@ -305,7 +306,6 @@ void VideoStreamPlayer::play() { if (playback.is_null()) { return; } - playback->stop(); playback->play(); set_process_internal(true); last_audio_time = 0; @@ -434,7 +434,9 @@ double VideoStreamPlayer::get_stream_position() const { void VideoStreamPlayer::set_stream_position(double p_position) { if (playback.is_valid()) { + resampler.flush(); playback->seek(p_position); + last_audio_time = 0; } } diff --git a/servers/audio/audio_rb_resampler.cpp b/servers/audio/audio_rb_resampler.cpp index 94c3f0dd36c..4e05474e07f 100644 --- a/servers/audio/audio_rb_resampler.cpp +++ b/servers/audio/audio_rb_resampler.cpp @@ -76,22 +76,37 @@ uint32_t AudioRBResampler::_resample(AudioFrame *p_dest, int p_todo, int32_t p_i } // This will probably never be used, but added anyway + // Downmix to stereo. Apply -3dB to center, and sides, -6dB to rear. + + // four channels - channel order: front left, front right, rear left, rear right if constexpr (C == 4) { - float v0 = rb[(pos << 2) + 0]; - float v1 = rb[(pos << 2) + 1]; - float v0n = rb[(pos_next << 2) + 0]; - float v1n = rb[(pos_next << 2) + 1]; + float v0 = rb[(pos << 2) + 0] + rb[(pos << 2) + 2] / 2; + float v1 = rb[(pos << 2) + 1] + rb[(pos << 2) + 3] / 2; + float v0n = rb[(pos_next << 2) + 0] + rb[(pos_next << 2) + 2] / 2; + float v1n = rb[(pos_next << 2) + 1] + rb[(pos_next << 2) + 3] / 2; v0 += (v0n - v0) * frac; v1 += (v1n - v1) * frac; p_dest[i] = AudioFrame(v0, v1); } + // six channels - channel order: front left, center, front right, rear left, rear right, LFE if constexpr (C == 6) { - float v0 = rb[(pos * 6) + 0]; - float v1 = rb[(pos * 6) + 1]; - float v0n = rb[(pos_next * 6) + 0]; - float v1n = rb[(pos_next * 6) + 1]; + float v0 = rb[(pos * 6) + 0] + rb[(pos * 6) + 1] / Math_SQRT2 + rb[(pos * 6) + 3] / 2; + float v1 = rb[(pos * 6) + 2] + rb[(pos * 6) + 1] / Math_SQRT2 + rb[(pos * 6) + 4] / 2; + float v0n = rb[(pos_next * 6) + 0] + rb[(pos_next * 6) + 1] / Math_SQRT2 + rb[(pos_next * 6) + 3] / 2; + float v1n = rb[(pos_next * 6) + 2] + rb[(pos_next * 6) + 1] / Math_SQRT2 + rb[(pos_next * 6) + 4] / 2; + v0 += (v0n - v0) * frac; + v1 += (v1n - v1) * frac; + p_dest[i] = AudioFrame(v0, v1); + } + // eight channels - channel order: front left, center, front right, side left, side right, rear left, rear + // right, LFE + if constexpr (C == 8) { + float v0 = rb[(pos << 3) + 0] + rb[(pos << 3) + 1] / Math_SQRT2 + rb[(pos << 3) + 3] / Math_SQRT2 + rb[(pos << 3) + 5] / 2; + float v1 = rb[(pos << 3) + 2] + rb[(pos << 3) + 1] / Math_SQRT2 + rb[(pos << 3) + 4] / Math_SQRT2 + rb[(pos << 3) + 6] / 2; + float v0n = rb[(pos_next << 3) + 0] + rb[(pos_next << 3) + 1] / Math_SQRT2 + rb[(pos_next << 3) + 3] / Math_SQRT2 + rb[(pos_next << 3) + 5] / 2; + float v1n = rb[(pos_next << 3) + 2] + rb[(pos_next << 3) + 1] / Math_SQRT2 + rb[(pos_next << 3) + 4] / Math_SQRT2 + rb[(pos_next << 3) + 6] / 2; v0 += (v0n - v0) * frac; v1 += (v1n - v1) * frac; p_dest[i] = AudioFrame(v0, v1); @@ -125,6 +140,9 @@ bool AudioRBResampler::mix(AudioFrame *p_dest, int p_frames) { case 6: src_read = _resample<6>(p_dest, target_todo, increment); break; + case 8: + src_read = _resample<8>(p_dest, target_todo, increment); + break; } if (src_read > read_space) { @@ -159,7 +177,7 @@ int AudioRBResampler::get_num_of_ready_frames() { } Error AudioRBResampler::setup(int p_channels, int p_src_mix_rate, int p_target_mix_rate, int p_buffer_msec, int p_minbuff_needed) { - ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6, ERR_INVALID_PARAMETER); + ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6 && p_channels != 8, ERR_INVALID_PARAMETER); int desired_rb_bits = nearest_shift(MAX((p_buffer_msec / 1000.0) * p_src_mix_rate, p_minbuff_needed)); diff --git a/servers/audio/audio_rb_resampler.h b/servers/audio/audio_rb_resampler.h index 18ed1bb738e..af55e56ba54 100644 --- a/servers/audio/audio_rb_resampler.h +++ b/servers/audio/audio_rb_resampler.h @@ -86,7 +86,7 @@ struct AudioRBResampler { } else if (w < r) { space = r - w - 1; } else { - space = (rb_len - r) + w - 1; + space = (rb_len - w) + (r - 1); } return space; @@ -153,6 +153,19 @@ struct AudioRBResampler { wp = (wp + 1) & rb_mask; } } break; + case 8: { + for (uint32_t i = 0; i < p_frames; i++) { + rb[(wp << 3) + 0] = read_buf[(i << 3) + 0]; + rb[(wp << 3) + 1] = read_buf[(i << 3) + 1]; + rb[(wp << 3) + 2] = read_buf[(i << 3) + 2]; + rb[(wp << 3) + 3] = read_buf[(i << 3) + 3]; + rb[(wp << 3) + 4] = read_buf[(i << 3) + 4]; + rb[(wp << 3) + 5] = read_buf[(i << 3) + 5]; + rb[(wp << 3) + 6] = read_buf[(i << 3) + 6]; + rb[(wp << 3) + 7] = read_buf[(i << 3) + 7]; + wp = (wp + 1) & rb_mask; + } + } break; } rb_write_pos.set(wp);