support for ffmpeg filters (#16)

* filtering system w/ realtime PiP test-example * emit events * comment more * test-example for overlaying text from ImageMagick using a filter * reformat * support audio filtering * test the error handling * manually close the muxer on filter error * clean up the debug * update the changelog
mmomtchev · Jan 1, 2024 · de46990 · de46990
1 parent f00e110
commit de46990
Show file tree

Hide file tree

Showing 19 changed files with 901 additions and 164 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -5,5 +5,6 @@
   "clang-format.fallbackStyle": "LLVM",
   "[cpp]": {
     "editor.defaultFormatter": "xaver.clang-format"
-  }
+  },
+  "editor.tabSize": 2
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## []
+ - Add `streams/Filter` to support ffmpeg filters
  - Support piping from a `ReadStream` to a `Demuxer`
  - Support piping from a `Muxer` to a `WriteStream`
  - Send `error` events on `Demuxer` and `Muxer`

diff --git a/deps/avcpp b/deps/avcpp
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -60,6 +60,7 @@
     "eslint-plugin-mocha": "^10.2.0",
     "magickwand.js": "^1.0.0-beta.6",
     "mocha": "^10.2.0",
+    "readable-stream-clone": "^0.0.7",
     "ts-node": "^10.9.1",
     "tsconfig-paths": "^4.2.0",
     "typescript": "^5.3.3"

diff --git a/src/binding/avcpp-frame.cc b/src/binding/avcpp-frame.cc
@@ -9,7 +9,27 @@ VideoFrame CreateVideoFrame(Nobind::Typemap::Buffer buffer, PixelFormat pixelFor
   return VideoFrame{buffer.first, buffer.second, pixelFormat, width, height};
 }
 
-VideoFrameBuffer CopyFrameToBuffer(av::VideoFrame &frame) {
+VideoFrameBuffer CopyFrameToBuffer(VideoFrame &frame) {
   auto size = frame.bufferSize();
   return VideoFrameBuffer{{[&frame, size](uint8_t *data) { frame.copyToBuffer(data, size); }, size}};
 }
+
+VideoFrame *GetVideoFrame(BufferSinkFilterContext &sink, OptionalErrorCode ec) {
+  VideoFrame *frame = new VideoFrame;
+  if (!sink.getVideoFrame(*frame, ec)) {
+    delete frame;
+    return nullptr;
+  }
+
+  return frame;
+}
+
+AudioSamples *GetAudioFrame(BufferSinkFilterContext &sink, OptionalErrorCode ec) {
+  AudioSamples *samples = new AudioSamples;
+  if (!sink.getAudioFrame(*samples, ec)) {
+    delete samples;
+    return nullptr;
+  }
+
+  return samples;
+}
diff --git a/src/binding/avcpp-frame.h b/src/binding/avcpp-frame.h
@@ -1,4 +1,5 @@
 #pragma once
+#include <filters/buffersink.h>
 #include <frame.h>
 #include <functional>
 #include <nooverrides.h>
@@ -63,3 +64,9 @@ AudioSamples CreateAudioSamples(Nobind::Typemap::Buffer buffer, SampleFormat sam
                                 uint64_t channelLayout, int sampleRate);
 
 VideoFrame CreateVideoFrame(Nobind::Typemap::Buffer buffer, PixelFormat pixelFormat, int width, int height);
+
+// These extension functions are needed to wrap their avcpp counterparts which return data in an argument
+// They return pointers to avoid unnecessary copying of the VideoFrame - as JavaScript makes no difference
+// In JavaScript all C++ objects are heap-allocated objects referenced by a pointer
+VideoFrame *GetVideoFrame(BufferSinkFilterContext &sink, OptionalErrorCode ec);
+AudioSamples *GetAudioFrame(BufferSinkFilterContext &sink, OptionalErrorCode ec);
diff --git a/src/binding/avcpp-nobind.cc b/src/binding/avcpp-nobind.cc
@@ -2,15 +2,15 @@
 #include <av.h>
 #include <avutils.h>
 #include <codec.h>
-#include <ffmpeg.h>
-#include <packet.h>
-#include <videorescaler.h>
-
-// API2
-#include <codec.h>
 #include <codeccontext.h>
+#include <ffmpeg.h>
+#include <filters/buffersink.h>
+#include <filters/buffersrc.h>
+#include <filters/filtergraph.h>
 #include <format.h>
 #include <formatcontext.h>
+#include <packet.h>
+#include <videorescaler.h>
 
 #include <nobind.h>
 
@@ -26,6 +26,9 @@ using namespace av;
 #define REGISTER_CONSTANT(CONST, NAME)                                                                                 \
   constexpr static int64_t __const_##CONST{static_cast<int64_t>(CONST)};                                               \
   m.def<&__const_##CONST, Nobind::ReadOnly>(NAME);
+#define REGISTER_ENUM(ENUM, ID)                                                                                        \
+  constexpr static int64_t __const_##ID{static_cast<int64_t>(ENUM::ID)};                                               \
+  m.def<&__const_##ID, Nobind::ReadOnly>(#ENUM "_" #ID);
 
 // An universal toString() wrapper, to be used as a class extension
 template <typename T> std::string ToString(T &v) {
@@ -296,7 +299,9 @@ NOBIND_MODULE_DATA(ffmpeg, m, ffmpegInstanceData) {
       .cons<const char *>()
       .cons<const ChannelLayoutView &>()
       .def<&ChannelLayout::channels>("channels")
-      .def<&ChannelLayout::layout>("layout");
+      .def<&ChannelLayout::layout>("layout")
+      .def<&ChannelLayout::isValid>("isValid")
+      .def<static_cast<std::string (ChannelLayoutView::*)() const>(&ChannelLayoutView::describe)>("toString");
 
   m.def<ChannelLayoutView>("ChannelLayoutView");
 
@@ -327,6 +332,8 @@ NOBIND_MODULE_DATA(ffmpeg, m, ffmpegInstanceData) {
       .def<&Packet::timeBase, Nobind::ReturnNested>("timeBase");
 
   m.def<VideoFrame>("VideoFrame")
+      .cons()
+      .def<&VideoFrame::null>("null")
       // Every global function can also be registered as a static class method
       .def<&CreateVideoFrame>("create")
       .def<&VideoFrame::isNull>("isNull")
@@ -354,6 +361,7 @@ NOBIND_MODULE_DATA(ffmpeg, m, ffmpegInstanceData) {
       .ext<static_cast<ToString_t<VideoFrame>>(&ToString<VideoFrame>)>("toString");
 
   m.def<AudioSamples>("AudioSamples")
+      .def<&AudioSamples::null>("null")
       .def<&CreateAudioSamples>("create")
       .def<&AudioSamples::isNull>("isNull")
       .def<&AudioSamples::isComplete>("isComplete")
@@ -417,6 +425,45 @@ NOBIND_MODULE_DATA(ffmpeg, m, ffmpegInstanceData) {
       .def<static_cast<AudioSamples (AudioResampler::*)(size_t, OptionalErrorCode)>(&AudioResampler::pop),
            Nobind::ReturnAsync>("popAsync");
 
+  m.def<Filter>("Filter").cons<const char *>();
+
+  m.def<FilterGraph>("FilterGraph")
+      .cons<>()
+      .def<&FilterGraph::createFilter>("createFilter")
+      .def<static_cast<void (FilterGraph::*)(const std::string &, OptionalErrorCode)>(&FilterGraph::parse)>("parse")
+      .def<&FilterGraph::config>("config")
+      .def<static_cast<FilterContext (FilterGraph::*)(const std::string &, OptionalErrorCode)>(&FilterGraph::filter)>(
+          "filter");
+
+  m.def<FilterContext>("FilterContext");
+
+  // We only export the safer API that copies frames for now
+  m.def<BufferSrcFilterContext>("BufferSrcFilterContext")
+      .cons<FilterContext &>()
+      .def<static_cast<void (BufferSrcFilterContext::*)(const VideoFrame &, OptionalErrorCode)>(
+          &BufferSrcFilterContext::writeVideoFrame)>("writeVideoFrame")
+      .def<static_cast<void (BufferSrcFilterContext::*)(const AudioSamples &, OptionalErrorCode)>(
+          &BufferSrcFilterContext::writeAudioSamples)>("writeAudioSamples")
+      .def<static_cast<void (BufferSrcFilterContext::*)(const VideoFrame &, OptionalErrorCode)>(
+               &BufferSrcFilterContext::writeVideoFrame),
+           Nobind::ReturnAsync>("writeVideoFrameAsync")
+      .def<static_cast<void (BufferSrcFilterContext::*)(const AudioSamples &, OptionalErrorCode)>(
+               &BufferSrcFilterContext::writeAudioSamples),
+           Nobind::ReturnAsync>("writeAudioSamplesAsync")
+      .def<&BufferSrcFilterContext::checkFilter>("checkFilter");
+
+  m.def<BufferSinkFilterContext>("BufferSinkFilterContext")
+      .cons<FilterContext &>()
+      .ext<&GetVideoFrame, Nobind::ReturnNullAccept>("getVideoFrame")
+      .ext<&GetAudioFrame, Nobind::ReturnNullAccept>("getAudioFrame")
+      .def<&BufferSinkFilterContext::setFrameSize>("setFrameSize")
+      .def<&BufferSinkFilterContext::frameRate>("frameRate")
+      .def<&BufferSinkFilterContext::checkFilter>("checkFilter");
+
+  REGISTER_ENUM(FilterMediaType, Unknown);
+  REGISTER_ENUM(FilterMediaType, Audio);
+  REGISTER_ENUM(FilterMediaType, Video);
+
   m.Exports().Set("WritableCustomIO", WritableCustomIO::GetClass(m.Env()));
   m.Exports().Set("ReadableCustomIO", ReadableCustomIO::GetClass(m.Env()));
 

diff --git a/src/binding/avcpp-types.h b/src/binding/avcpp-types.h
@@ -54,6 +54,7 @@ TYPEMAPS_FOR_ENUM(AVCodecID);
 TYPEMAPS_FOR_ENUM(AVMediaType);
 TYPEMAPS_FOR_ENUM(AVPixelFormat);
 TYPEMAPS_FOR_ENUM(AVSampleFormat);
+TYPEMAPS_FOR_ENUM(FilterMediaType);
 // While this is not an enum, the typemap is still compatible
 TYPEMAPS_FOR_ENUM(std::bitset<64>);
 

diff --git a/src/binding/constants b/src/binding/constants
@@ -1009,3 +1009,5 @@ REGISTER_CONSTANT(SWS_SPLINE, "SWS_SPLINE");
 REGISTER_CONSTANT(SWS_SRC_V_CHR_DROP_MASK, "SWS_SRC_V_CHR_DROP_MASK");
 REGISTER_CONSTANT(SWS_SRC_V_CHR_DROP_SHIFT, "SWS_SRC_V_CHR_DROP_SHIFT");
 REGISTER_CONSTANT(SWS_X, "SWS_X");
+REGISTER_CONSTANT(AV_BUFFERSINK_FLAG_NO_REQUEST, "AV_BUFFERSINK_FLAG_NO_REQUEST");
+REGISTER_CONSTANT(AV_BUFFERSINK_FLAG_PEEK, "AV_BUFFERSINK_FLAG_PEEK");
diff --git a/src/binding/gen_constants.sh b/src/binding/gen_constants.sh
@@ -20,4 +20,5 @@ sed -nr 's/^.*\s+AVFMT_([_A-Z0-9]+)[, ].*/AVFMT_\1 AV_FMT_\1/p' ${FFMPEG}/src/li
 sed -nr 's/^.*\s+AV_LOG_([_A-Z0-9]+)[, ].*/AV_LOG_\1 AV_LOG_\1/p' ${FFMPEG}/src/libavutil/log.h | sort | uniq
 sed -nr 's/^.*\s+SWS_([_A-Z0-9]+)[, ].*/SWS_\1 SWS_\1/p' ${FFMPEG}/src/libswscale/swscale.h | sort | uniq
 sed -nr 's/^.*\s+SWS_([_A-Z0-9]+)[, ].*/SWS_\1 SWS_\1/p' ${FFMPEG}/src/libswresample/swresample.h | sort | uniq
+sed -nr 's/^.*\s+AV_BUFFERSINK_FLAG_([_A-Z0-9]+)[, ].*/AV_BUFFERSINK_FLAG_\1 AV_BUFFERSINK_FLAG_\1/p' ${FFMPEG}/src/libavfilter/buffersink.h | sort | uniq
 ) | sed -r 's/(.*)\s(.*)/REGISTER_CONSTANT(\1, "\2");/g'
diff --git a/src/lib/AudioDecoder.ts b/src/lib/AudioDecoder.ts
@@ -75,7 +75,8 @@ export class AudioDecoder extends MediaTransform implements MediaStream {
       sampleFormat: this.decoder.sampleFormat(),
       sampleRate: this.decoder.sampleRate(),
       channelLayout: new ffmpeg.ChannelLayout(this.decoder.channelLayout()),
-      frameSize: this.decoder.frameSize()
+      frameSize: this.decoder.frameSize(),
+      timeBase: this.decoder.timeBase()
     } as AudioStreamDefinition;
   }
 }
+235 −0		example/api2-samples/api2-decode-filter.cpp
+389 −0		example/api2-samples/api2-decode-overlay-encode.cpp
+327 −236		src/audioresampler.cpp
+6 −2		src/filters/buffersink.cpp
+23 −0		src/filters/filtergraph.cpp
+2 −0		src/filters/filtergraph.h