Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge AI Video commits into master #415

Merged
merged 5 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added data/audio.mp3
Binary file not shown.
Binary file added data/audio.ogg
Binary file not shown.
6 changes: 1 addition & 5 deletions ffmpeg/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,7 @@ func TestTranscoderAPI_InvalidFile(t *testing.T) {
// fail # 1
in.Fname = "none"
_, err := tc.Transcode(in, out)
if err == nil || err.Error() != "TranscoderInvalidVideo" {
// Early codec check didn't find video in missing input file so we get `TranscoderInvalidVideo`
// instead of `No such file or directory`
t.Error("Expected 'TranscoderInvalidVideo', got ", err)
}
require.Error(t, err, "No such file or directory")

// success # 1
in.Fname = "../transcoder/test.ts"
Expand Down
8 changes: 6 additions & 2 deletions ffmpeg/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,13 @@ int open_input(input_params *params, struct input_ctx *ctx)

ctx->transmuxing = params->transmuxing;

// open demuxer
ret = avformat_open_input(&ic, inp, NULL, NULL);
// open demuxer/ open demuxer
AVDictionary **demuxer_opts = NULL;
if (params->demuxer.opts) demuxer_opts = &params->demuxer.opts;
ret = avformat_open_input(&ic, inp, NULL, demuxer_opts);
if (ret < 0) LPMS_ERR(open_input_err, "demuxer: Unable to open input");
// If avformat_open_input replaced the options AVDictionary with options that were not found free it
if (demuxer_opts) av_dict_free(demuxer_opts);
ctx->ic = ic;
ret = avformat_find_stream_info(ic, NULL);
if (ret < 0) LPMS_ERR(open_input_err, "Unable to find input info");
Expand Down
8 changes: 8 additions & 0 deletions ffmpeg/extras.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,13 @@ int lpms_get_codec_info(char *fname, pcodec_info out)
// instead of returning -1
ret = GET_CODEC_STREAMS_MISSING;
}
if (ic->duration != AV_NOPTS_VALUE) {
out->dur = ic->duration / AV_TIME_BASE;
}
// Return
if (ic->iformat && ic->iformat->name) {
strncpy(out->format_name, ic->iformat->name, MIN(strlen(out->format_name), strlen(ic->iformat->name)) + 1);
}
if (video_present && vc->name) {
strncpy(out->video_codec, vc->name, MIN(strlen(out->video_codec), strlen(vc->name))+1);
// If video track is present extract pixel format info
Expand All @@ -176,12 +182,14 @@ int lpms_get_codec_info(char *fname, pcodec_info out)
}
out->width = ic->streams[vstream]->codecpar->width;
out->height = ic->streams[vstream]->codecpar->height;
out->fps = av_q2d(ic->streams[vstream]->r_frame_rate);
} else {
// Indicate failure to extract video codec from given container
out->video_codec[0] = 0;
}
if (audio_present && ac->name) {
strncpy(out->audio_codec, ac->name, MIN(strlen(out->audio_codec), strlen(ac->name))+1);
out->audio_bit_rate = ic->streams[astream]->codecpar->bit_rate;
} else {
// Indicate failure to extract audio codec from given container
out->audio_codec[0] = 0;
Expand Down
4 changes: 4 additions & 0 deletions ffmpeg/extras.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
#define _LPMS_EXTRAS_H_

typedef struct s_codec_info {
char * format_name;
char * video_codec;
char * audio_codec;
int audio_bit_rate;
int pixel_format;
int width;
int height;
double fps;
double dur;
} codec_info, *pcodec_info;

int lpms_rtmp2hls(char *listen, char *outf, char *ts_tmpl, char *seg_time, char *seg_start);
Expand Down
77 changes: 74 additions & 3 deletions ffmpeg/ffmpeg.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ type TranscodeOptionsIn struct {
Accel Acceleration
Device string
Transmuxing bool
Profile VideoProfile
}

type TranscodeOptions struct {
Expand Down Expand Up @@ -241,9 +242,13 @@ const (
)

type MediaFormatInfo struct {
Format string
Acodec, Vcodec string
PixFormat PixelFormat
Width, Height int
FPS float32
DurSecs int64
AudioBitrate int
}

func (f *MediaFormatInfo) ScaledHeight(width int) int {
Expand All @@ -258,15 +263,21 @@ func GetCodecInfo(fname string) (CodecStatus, MediaFormatInfo, error) {
format := MediaFormatInfo{}
cfname := C.CString(fname)
defer C.free(unsafe.Pointer(cfname))
fmtname := C.CString(strings.Repeat("0", 255))
acodec_c := C.CString(strings.Repeat("0", 255))
vcodec_c := C.CString(strings.Repeat("0", 255))
defer C.free(unsafe.Pointer(fmtname))
defer C.free(unsafe.Pointer(acodec_c))
defer C.free(unsafe.Pointer(vcodec_c))
var params_c C.codec_info
params_c.format_name = fmtname
params_c.video_codec = vcodec_c
params_c.audio_codec = acodec_c
params_c.pixel_format = C.AV_PIX_FMT_NONE
status := CodecStatus(C.lpms_get_codec_info(cfname, &params_c))
if C.strlen(fmtname) < 255 {
format.Format = C.GoString(fmtname)
}
if C.strlen(acodec_c) < 255 {
format.Acodec = C.GoString(acodec_c)
}
Expand All @@ -276,6 +287,9 @@ func GetCodecInfo(fname string) (CodecStatus, MediaFormatInfo, error) {
format.PixFormat = PixelFormat{int(params_c.pixel_format)}
format.Width = int(params_c.width)
format.Height = int(params_c.height)
format.FPS = float32(params_c.fps)
format.DurSecs = int64(params_c.dur)
format.AudioBitrate = int(params_c.audio_bit_rate)
return status, format, nil
}

Expand All @@ -295,6 +309,19 @@ func GetCodecInfoBytes(data []byte) (CodecStatus, MediaFormatInfo, error) {
}
fname := fmt.Sprintf("pipe:%d", or.Fd())
status, format, err = GetCodecInfo(fname)

// estimate duration from bitrate and filesize for audio
// some formats do not have built-in track duration metadata,
// and pipes do not have a filesize on their own which breaks ffmpeg's own
// duration estimates. So do the estimation calculation ourselves
// NB : mpegts has the same problem but may contain video so let's not handle that
// some other formats, eg ogg, show zero bitrate
//
// ffmpeg estimation of duration from bitrate:
// https://github.com/FFmpeg/FFmpeg/blob/8280ec7a3213c9b7bad88aac3695be2dedd2c00b/libavformat/demux.c#L1798
if format.DurSecs == 0 && format.AudioBitrate > 0 && (format.Format == "mp3" || format.Format == "wav" || format.Format == "aac") {
format.DurSecs = int64(len(data) * 8 / format.AudioBitrate)
}
return status, format, err
}

Expand Down Expand Up @@ -649,6 +676,11 @@ func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.
// needed for hw dec -> hw rescale -> sw enc
filters = filters + ",hwdownload,format=nv12"
}
if p.Accel == Nvidia && filepath.Ext(input.Fname) == ".png" {
// If the input is PNG image(s) and we are scaling on a Nvidia device
// we need to first convert to a pixel format that the scale_npp filter supports
filters = "format=nv12," + filters
}
// set FPS denominator to 1 if unset by user
if param.FramerateDen == 0 {
param.FramerateDen = 1
Expand Down Expand Up @@ -850,6 +882,19 @@ func destroyCOutputParams(params []C.output_params) {
}
}

func hasVideoMetadata(fname string) bool {
if strings.HasPrefix(strings.ToLower(fname), "pipe:") {
return false
}

fileInfo, err := os.Stat(fname)
if err != nil {
return false
}

return !fileInfo.IsDir()
}

func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) (*TranscodeResults, error) {
t.mu.Lock()
defer t.mu.Unlock()
Expand All @@ -861,8 +906,8 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions)
}
var reopendemux bool
reopendemux = false
// don't read metadata for pipe input, because it can't seek back and av_find_input_format in the decoder will fail
if !strings.HasPrefix(strings.ToLower(input.Fname), "pipe:") {
// don't read metadata for inputs without video metadata, because it can't seek back and av_find_input_format in the decoder will fail
if hasVideoMetadata(input.Fname) {
status, format, err := GetCodecInfo(input.Fname)
if err != nil {
return nil, err
Expand Down Expand Up @@ -942,8 +987,34 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions)
defer C.free(unsafe.Pointer(fname))
xcoderParams := C.CString("")
defer C.free(unsafe.Pointer(xcoderParams))

var demuxerOpts C.component_opts

ext := filepath.Ext(input.Fname)
// If the input has an image file extension setup the image2 demuxer
if ext == ".png" {
image2 := C.CString("image2")
defer C.free(unsafe.Pointer(image2))

demuxerOpts = C.component_opts{
name: image2,
}

if input.Profile.Framerate > 0 {
if input.Profile.FramerateDen == 0 {
input.Profile.FramerateDen = 1
}

// Do not try to free in this function because in the C code avformat_open_input()
// will destroy this
demuxerOpts.opts = newAVOpts(map[string]string{
"framerate": fmt.Sprintf("%d/%d", input.Profile.Framerate, input.Profile.FramerateDen),
})
}
}

inp := &C.input_params{fname: fname, hw_type: hw_type, device: device, xcoderParams: xcoderParams,
handle: t.handle}
handle: t.handle, demuxer: demuxerOpts}
if input.Transmuxing {
inp.transmuxing = 1
}
Expand Down
87 changes: 86 additions & 1 deletion ffmpeg/ffmpeg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1873,7 +1873,6 @@ func TestTranscoder_VFR(t *testing.T) {
run, dir := setupTest(t)
defer os.RemoveAll(dir)


// prepare the input by generating a vfr video and verify its properties
cmd := `
ffmpeg -hide_banner -i "$1/../transcoder/test.ts" -an -vf "setpts='\
Expand Down Expand Up @@ -1967,3 +1966,89 @@ PTS_EOF
`
run(cmd)
}

func TestDurationFPS_GetCodecInfo(t *testing.T) {
run, dir := setupTest(t)
defer os.RemoveAll(dir)

//Generate test files
cmd := `
cp "$1/../data/duplicate-audio-dts.ts" test.ts
ffprobe -loglevel warning -show_format test.ts | grep duration=2.008555
ffprobe -loglevel warning -show_streams -select_streams v test.ts | grep r_frame_rate=30/1
cp "$1/../data/bunny.mp4" test.mp4
ffmpeg -loglevel warning -i test.mp4 -c:v copy -c:a copy -t 2 test-short.mp4
ffprobe -loglevel warning -show_format test-short.mp4 | grep duration=2.043356
ffprobe -loglevel warning -show_streams -select_streams v test-short.mp4 | grep r_frame_rate=24/1
ffmpeg -loglevel warning -i test-short.mp4 -c:v libvpx -c:a vorbis -strict -2 -t 2 test.webm
ffprobe -loglevel warning -show_format test.webm | grep duration=2.049000
ffprobe -loglevel warning -show_streams -select_streams v test.webm | grep r_frame_rate=24/1
ffmpeg -loglevel warning -i test-short.mp4 -vn -c:a aac -b:a 128k test.m4a
ffprobe -loglevel warning -show_format test.m4a | grep duration=2.042993
ffmpeg -loglevel warning -i test-short.mp4 -vn -c:a flac test.flac
ffprobe -loglevel warning -show_format test.flac | grep duration=2.043356

ffmpeg -loglevel warning -i test.mp4 -vn -c:a copy stereo-audio.aac
ffprobe -show_entries stream=channels,channel_layout -of csv stereo-audio.aac | grep stream,2,stereo
ffprobe -show_format stereo-audio.aac | grep duration=52.440083

ffmpeg -i test.mp4 -vn stereo-audio.wav
ffprobe -show_format stereo-audio.wav | grep duration=60.139683

cp $1/../data/audio.mp3 test.mp3
ffprobe -show_format test.mp3 | grep duration=1.968000

cp $1/../data/audio.ogg test.ogg
ffprobe -show_format test.ogg | grep duration=1.974500
`
run(cmd)

files := []struct {
Filename string
Format string
Duration int64
FPS float32

// skip check if bytes version is known to fail duration
BytesSkipDuration bool
}{
{Filename: "test-short.mp4", Format: "mov,mp4,m4a,3gp,3g2,mj2", Duration: 2, FPS: 24},
{Filename: "test.ts", Format: "mpegts", Duration: 2, FPS: 30.0, BytesSkipDuration: true},
{Filename: "test.flac", Format: "flac", Duration: 2},
{Filename: "test.webm", Format: "matroska,webm", Duration: 2, FPS: 24},
{Filename: "test.m4a", Format: "mov,mp4,m4a,3gp,3g2,mj2", Duration: 2},
{Filename: "stereo-audio.aac", Format: "aac", Duration: 52},
{Filename: "stereo-audio.wav", Format: "wav", Duration: 60},
{Filename: "test.mp3", Format: "mp3", Duration: 1},
{Filename: "test.ogg", Format: "ogg", Duration: 1, BytesSkipDuration: true},
}
for _, file := range files {
t.Run(file.Filename, func(t *testing.T) {
fname := path.Join(dir, file.Filename)
// use 'bytes' prefix to prevent test runner regex matching
for _, tt := range []string{"GetCodecInfo", "BytesGetCodecInfo"} {
t.Run(tt, func(t *testing.T) {
assert := assert.New(t)
f := func() (CodecStatus, MediaFormatInfo, error) {
if tt == "GetCodecInfo" {
return GetCodecInfo(fname)
}
d, err := os.ReadFile(fname)
assert.Nil(err, "reading file")
return GetCodecInfoBytes(d)
}
status, format, err := f()
assert.Nil(err, "getcodecinfo error")
assert.Equal(CodecStatusOk, status, "status not ok")
assert.Equal(file.Format, format.Format, "format mismatch")
if tt == "BytesGetCodecInfo" && file.BytesSkipDuration {
assert.Equal(int64(0), format.DurSecs, "special duration mismatch")
} else {
assert.Equal(file.Duration, format.DurSecs, "duration mismatch")
}
assert.Equal(file.FPS, format.FPS, "fps mismatch")
})
}
})
}
}
10 changes: 8 additions & 2 deletions ffmpeg/transcoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,23 @@ int transcode_init(struct transcode_thread *h, input_params *inp,

if (!inp) LPMS_ERR(transcode_cleanup, "Missing input params")

AVDictionary **demuxer_opts;
if (inp->demuxer.opts) demuxer_opts = &inp->demuxer.opts;

// by default we re-use decoder between segments of same stream
// unless we are using SW deocder and had to re-open IO or demuxer
if (!ictx->ic) {
// reopen demuxer for the input segment if needed
// XXX could open_input() be re-used here?
ret = avformat_open_input(&ictx->ic, inp->fname, NULL, NULL);
ret = avformat_open_input(&ictx->ic, inp->fname, NULL, demuxer_opts);
if (ret < 0) LPMS_ERR(transcode_cleanup, "Unable to reopen demuxer");
// If avformat_open_input replaced the options AVDictionary with options that were not found free it
if (demuxer_opts) av_dict_free(demuxer_opts);
ret = avformat_find_stream_info(ictx->ic, NULL);
if (ret < 0) LPMS_ERR(transcode_cleanup, "Unable to find info for reopened stream")
} else if (!ictx->ic->pb) {
} else if (is_mpegts(ictx->ic) && !ictx->ic->pb) {
// reopen input segment file IO context if needed
// only necessary for mpegts
ret = avio_open(&ictx->ic->pb, inp->fname, AVIO_FLAG_READ);
if (ret < 0) LPMS_ERR(transcode_cleanup, "Unable to reopen file");
} else reopen_decoders = 0;
Expand Down
2 changes: 2 additions & 0 deletions ffmpeg/transcoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ typedef struct {
char *device;
char *xcoderParams;

// Optional demuxer opts
component_opts demuxer;
// Optional video decoder + opts
component_opts video;

Expand Down
8 changes: 4 additions & 4 deletions install_ffmpeg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,13 @@ if [[ ! -e "$ROOT/ffmpeg/libavcodec/libavcodec.a" ]]; then
./configure ${TARGET_OS:-} $DISABLE_FFMPEG_COMPONENTS --fatal-warnings \
--enable-libx264 --enable-gpl \
--enable-protocol=rtmp,file,pipe \
--enable-muxer=mpegts,hls,segment,mp4,hevc,matroska,webm,null --enable-demuxer=flv,mpegts,mp4,mov,webm,matroska \
--enable-muxer=mp3,wav,flac,mpegts,hls,segment,mp4,hevc,matroska,webm,null --enable-demuxer=mp3,wav,flac,flv,mpegts,mp4,mov,webm,matroska,image2 \
--enable-bsf=h264_mp4toannexb,aac_adtstoasc,h264_metadata,h264_redundant_pps,hevc_mp4toannexb,extract_extradata \
--enable-parser=aac,aac_latm,h264,hevc,vp8,vp9 \
--enable-parser=mpegaudio,vorbis,opus,flac,aac,aac_latm,h264,hevc,vp8,vp9,png \
--enable-filter=abuffer,buffer,abuffersink,buffersink,afifo,fifo,aformat,format \
--enable-filter=aresample,asetnsamples,fps,scale,hwdownload,select,livepeer_dnn,signature \
--enable-encoder=aac,opus,libx264 \
--enable-decoder=aac,opus,h264 \
--enable-encoder=mp3,vorbis,flac,aac,opus,libx264 \
--enable-decoder=mp3,vorbis,flac,aac,opus,h264,png \
--extra-cflags="${EXTRA_CFLAGS} -I${ROOT}/compiled/include -I/usr/local/cuda/include" \
--extra-ldflags="${EXTRA_FFMPEG_LDFLAGS} -L${ROOT}/compiled/lib -L/usr/local/cuda/lib64" \
--prefix="$ROOT/compiled" \
Expand Down
Loading