diff --git a/c_src/xav/decoder.c b/c_src/xav/decoder.c index a71838d..ab088a3 100644 --- a/c_src/xav/decoder.c +++ b/c_src/xav/decoder.c @@ -9,11 +9,12 @@ struct Decoder *decoder_alloc() { decoder->codec = NULL; decoder->c = NULL; + decoder->out_format = AV_PIX_FMT_NONE; return decoder; } -int decoder_init(struct Decoder *decoder, const char *codec) { +int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format) { if (strcmp(codec, "opus") == 0) { decoder->media_type = AVMEDIA_TYPE_AUDIO; decoder->codec = avcodec_find_decoder(AV_CODEC_ID_OPUS); @@ -25,7 +26,7 @@ int decoder_init(struct Decoder *decoder, const char *codec) { decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H264); } else if (strcmp(codec, "h265") == 0) { decoder->media_type = AVMEDIA_TYPE_VIDEO; - decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H265); + decoder->codec = avcodec_find_decoder(AV_CODEC_ID_HEVC); } else { return -1; } @@ -34,6 +35,13 @@ int decoder_init(struct Decoder *decoder, const char *codec) { return -1; } + if(decoder->media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) { + decoder->out_format = av_get_pix_fmt(out_format); + if (decoder->out_format == AV_PIX_FMT_NONE) { + return -1; + } + } + decoder->c = avcodec_alloc_context3(decoder->codec); if (!decoder->c) { return -1; diff --git a/c_src/xav/decoder.h b/c_src/xav/decoder.h index a4275fe..d00ef06 100644 --- a/c_src/xav/decoder.h +++ b/c_src/xav/decoder.h @@ -8,6 +8,7 @@ struct Decoder { enum AVMediaType media_type; + enum AVPixelFormat out_format; AVFrame *frame; AVPacket *pkt; const AVCodec *codec; @@ -16,7 +17,7 @@ struct Decoder { struct Decoder *decoder_alloc(); -int decoder_init(struct Decoder *decoder, const char *codec); +int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format); int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame); diff --git a/c_src/xav/utils.c b/c_src/xav/utils.c index 9d58f27..76e3cba 100644 --- a/c_src/xav/utils.c +++ b/c_src/xav/utils.c @@ -1,5 +1,6 @@ #include "utils.h" #include +#include #include #include @@ -33,13 +34,16 @@ ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int return enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term); } -ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, uint8_t *data[4], - int linesize[4], const char *format_name) { +ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame) { ERL_NIF_TERM data_term; - unsigned char *ptr = enif_make_new_binary(env, linesize[0] * frame->height, &data_term); - memcpy(ptr, data[0], linesize[0] * frame->height); - ERL_NIF_TERM format_term = enif_make_atom(env, format_name); + int payload_size = av_image_get_buffer_size(frame->format, frame->width, frame->height, 1); + unsigned char *ptr = enif_make_new_binary(env, payload_size, &data_term); + + av_image_copy_to_buffer(ptr, payload_size, (const uint8_t *const *)frame->data, + (const int*)frame->linesize, frame->format, frame->width, frame->height, 1); + + ERL_NIF_TERM format_term = enif_make_atom(env, av_get_pix_fmt_name(frame->format)); ERL_NIF_TERM height_term = enif_make_int(env, frame->height); ERL_NIF_TERM width_term = enif_make_int(env, frame->width); ERL_NIF_TERM pts_term = enif_make_int64(env, frame->pts); diff --git a/c_src/xav/utils.h b/c_src/xav/utils.h index 9cd5891..78b2f30 100644 --- a/c_src/xav/utils.h +++ b/c_src/xav/utils.h @@ -18,7 +18,6 @@ ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term); ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason); ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg); -ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, uint8_t *out_data[4], - int out_linesize[4], const char *out_format); +ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame); ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples, int out_size, const char *out_format, int pts); diff --git a/c_src/xav/video_converter.c b/c_src/xav/video_converter.c index 446d5ad..9b3f42b 100644 --- a/c_src/xav/video_converter.c +++ b/c_src/xav/video_converter.c @@ -1,25 +1,38 @@ #include "video_converter.h" -int video_converter_convert(AVFrame *src_frame, uint8_t *out_data[], int out_linesize[]) { +int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixelFormat out_format) { int ret; + *dst_frame = av_frame_alloc(); + if (!*dst_frame) { + return -1; + } + + (*dst_frame)->width = src_frame->width; + (*dst_frame)->height = src_frame->height; + (*dst_frame)->format = out_format; + (*dst_frame)->pts = src_frame->pts; + + ret = av_frame_get_buffer(*dst_frame, 0); + if (ret < 0) { + return ret; + } + struct SwsContext *sws_ctx = sws_getContext(src_frame->width, src_frame->height, src_frame->format, src_frame->width, - src_frame->height, AV_PIX_FMT_RGB24, SWS_BILINEAR, NULL, NULL, NULL); - - ret = av_image_alloc(out_data, out_linesize, src_frame->width, src_frame->height, - AV_PIX_FMT_RGB24, 1); + src_frame->height, out_format, SWS_BILINEAR, NULL, NULL, NULL); if (ret < 0) { return ret; } + // is this (const uint8_t * const*) cast really correct? ret = sws_scale(sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 0, - src_frame->height, out_data, out_linesize); + src_frame->height, (*dst_frame)->data, (*dst_frame)->linesize); if (ret < 0) { - av_freep(&out_data[0]); + av_frame_free(dst_frame); sws_freeContext(sws_ctx); return ret; } @@ -27,4 +40,4 @@ int video_converter_convert(AVFrame *src_frame, uint8_t *out_data[], int out_lin sws_freeContext(sws_ctx); return ret; -} +} \ No newline at end of file diff --git a/c_src/xav/video_converter.h b/c_src/xav/video_converter.h index 4504d45..d53670c 100644 --- a/c_src/xav/video_converter.h +++ b/c_src/xav/video_converter.h @@ -5,4 +5,4 @@ #include #include -int video_converter_convert(AVFrame *src_frame, uint8_t *out_data[4], int out_linesize[4]); +int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixelFormat out_format); diff --git a/c_src/xav/xav_decoder.c b/c_src/xav/xav_decoder.c index a7f7374..10c188c 100644 --- a/c_src/xav/xav_decoder.c +++ b/c_src/xav/xav_decoder.c @@ -62,7 +62,7 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { return xav_nif_raise(env, "failed_to_allocate_decoder"); } - if (decoder_init(xav_decoder->decoder, codec) != 0) { + if (decoder_init(xav_decoder->decoder, codec, xav_decoder->out_format) != 0) { return xav_nif_raise(env, "failed_to_init_decoder"); } @@ -79,17 +79,21 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* fr if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) { XAV_LOG_DEBUG("Converting video to RGB"); - uint8_t *out_data[4]; - int out_linesize[4]; + int out_pix_fmt = xav_decoder->decoder->out_format; - ret = video_converter_convert(frame, out_data, out_linesize); + if (out_pix_fmt == AV_PIX_FMT_NONE) { + return xav_nif_video_frame_to_term(env, frame); + } + + AVFrame *dst_frame; + ret = video_converter_convert(frame, &dst_frame, out_pix_fmt); if (ret <= 0) { return xav_nif_raise(env, "failed_to_decode"); } - frame_term = xav_nif_video_frame_to_term(env, frame, out_data, out_linesize, "rgb"); + frame_term = xav_nif_video_frame_to_term(env, dst_frame); - av_freep(&out_data[0]); + av_frame_free(&dst_frame); } else if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_AUDIO) { XAV_LOG_DEBUG("Converting audio to desired out format"); diff --git a/c_src/xav/xav_reader.c b/c_src/xav/xav_reader.c index 108dbae..fcaf0d8 100644 --- a/c_src/xav/xav_reader.c +++ b/c_src/xav/xav_reader.c @@ -147,18 +147,15 @@ ERL_NIF_TERM next_frame(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { if (xav_reader->reader->media_type == AVMEDIA_TYPE_VIDEO) { XAV_LOG_DEBUG("Converting video to RGB"); - uint8_t *out_data[4]; - int out_linesize[4]; - - ret = video_converter_convert(xav_reader->reader->frame, out_data, out_linesize); + AVFrame *dst_frame; + ret = video_converter_convert(xav_reader->reader->frame, &dst_frame, AV_PIX_FMT_RGB24); if (ret <= 0) { return xav_nif_raise(env, "failed_to_read"); } - frame_term = - xav_nif_video_frame_to_term(env, xav_reader->reader->frame, out_data, out_linesize, "rgb"); + frame_term = xav_nif_video_frame_to_term(env, dst_frame); - av_freep(&out_data[0]); + av_frame_free(&dst_frame); } else if (xav_reader->reader->media_type == AVMEDIA_TYPE_AUDIO) { XAV_LOG_DEBUG("Converting audio to desired out format"); diff --git a/lib/decoder.ex b/lib/decoder.ex index c46e7bc..ca3cef9 100644 --- a/lib/decoder.ex +++ b/lib/decoder.ex @@ -30,8 +30,11 @@ defmodule Xav.Decoder do [out_format: :f32] ``` - Video frames are always returned in RGB format. - This setting cannot be changed. + or video samples format: + + ```elixir + [out_format: :rgb24] + ``` Audio samples are always in the packed form - samples from different channels are interleaved in the same, single binary: diff --git a/lib/frame.ex b/lib/frame.ex index 4486015..8b03ba6 100644 --- a/lib/frame.ex +++ b/lib/frame.ex @@ -11,9 +11,15 @@ defmodule Xav.Frame do @typedoc """ Possible video frame formats. - Currently, only RGB is supported. + The list of accepted formats are all `ffmpeg` pixel formats. For a complete list run: + + ```sh + ffmpeg -pix_fmts + ``` + + An example of a pixel format is `:rgb24`. """ - @type video_format() :: :rgb + @type video_format() :: atom() @type format() :: audio_format() | video_format() diff --git a/test/decoder_test.exs b/test/decoder_test.exs index 3e3737e..9baf81f 100644 --- a/test/decoder_test.exs +++ b/test/decoder_test.exs @@ -286,7 +286,11 @@ defmodule Xav.DecoderTest do 142, 204, 5, 106, 217, 175, 162, 62, 128, 161, 69, 136, 234, 30, 43, 165, 152, 104, 143>> + # Use ffmpeg to extract the first frame of the video + # ffmpeg -i sample_video.mp4 -c:v copy -f h264 -vframes 1 sample_h264.h264 @h264_frame File.read!("test/fixtures/decoder/sample_h264.h264") + # You can do the same for hevc given that the mp4 file contains a hevc stream + # ffmpeg -i sample_video.mp4 -c:v copy -f hevc -vframes 1 sample_h265.h265 @h265_frame File.read!("test/fixtures/decoder/sample_h265.h265") test "new/0" do @@ -323,8 +327,10 @@ defmodule Xav.DecoderTest do test "video keyframe" do decoder = Xav.Decoder.new(:vp8) - assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, format: :rgb}} = + assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, data: frame, format: :yuv420p}} = Xav.Decoder.decode(decoder, @vp8_keyframe) + + assert byte_size(frame) == 640 * 480 * 3 / 2 end test "video without prior keyframe" do @@ -338,7 +344,7 @@ defmodule Xav.DecoderTest do assert :ok = Xav.Decoder.decode(decoder, @h264_frame) - assert {:ok, [%Xav.Frame{width: 1280, height: 720, pts: 0, format: :rgb}]} = + assert {:ok, [%Xav.Frame{width: 1280, height: 720, pts: 0, format: :yuv420p}]} = Xav.Decoder.flush(decoder) end @@ -347,8 +353,18 @@ defmodule Xav.DecoderTest do assert :ok = Xav.Decoder.decode(decoder, @h265_frame) - assert {:ok, [%Xav.Frame{width: 1920, height: 1080, pts: 0, format: :rgb}]} = + assert {:ok, [%Xav.Frame{width: 1920, height: 1080, pts: 0, format: :yuv420p}]} = Xav.Decoder.flush(decoder) end + + test "convert video frame" do + decoder = Xav.Decoder.new(:vp8, out_format: :rgb24) + + assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, data: frame, format: :rgb24}} = + Xav.Decoder.decode(decoder, @vp8_keyframe) + + assert byte_size(frame) == 640 * 480 * 3 + end + end end end