From 56253a4fb07ffb87d4bd94743bfc31cee5eaf4aa Mon Sep 17 00:00:00 2001 From: Billal Ghilas <84322223+gBillal@users.noreply.github.com> Date: Sun, 19 Jan 2025 20:09:09 +0100 Subject: [PATCH] Add scaling support to converter and decoder (#27) --- c_src/xav/video_converter.c | 65 ++++++++++++++++------- c_src/xav/video_converter.h | 19 ++++--- c_src/xav/xav_decoder.c | 90 +++++++++++++++++++------------ c_src/xav/xav_decoder.h | 8 ++- c_src/xav/xav_reader.c | 6 +-- c_src/xav/xav_reader.h | 2 +- c_src/xav/xav_video_converter.c | 93 +++++++++++++++++++++------------ c_src/xav/xav_video_converter.h | 10 ++-- lib/decoder.ex | 8 ++- lib/decoder_nif.ex | 4 +- lib/frame.ex | 7 ++- lib/video_converter.ex | 84 ++++++++++++++++++++++------- lib/video_converter_nif.ex | 2 +- test/decoder_test.exs | 9 ++++ test/video_converter_test.exs | 91 ++++++++++++++++++++------------ 15 files changed, 334 insertions(+), 164 deletions(-) diff --git a/c_src/xav/video_converter.c b/c_src/xav/video_converter.c index ded0f78..6565af7 100644 --- a/c_src/xav/video_converter.c +++ b/c_src/xav/video_converter.c @@ -1,41 +1,65 @@ #include "video_converter.h" #include "utils.h" -static inline unsigned int video_converter_resolution_changed(struct VideoConverter *converter, AVFrame *frame) { - return converter->in_format != frame->format || - converter->in_width != frame->width || - converter->in_height != frame->height; +static inline unsigned int video_converter_resolution_changed(struct VideoConverter *converter, + AVFrame *frame) { + return converter->in_format != frame->format || converter->in_width != frame->width || + converter->in_height != frame->height; } struct VideoConverter *video_converter_alloc() { struct VideoConverter *converter = (struct VideoConverter *)XAV_ALLOC(sizeof(struct VideoConverter)); - if(converter) { + if (converter) { converter->sws_ctx = NULL; converter->dst_frame = av_frame_alloc(); } return converter; } -int video_converter_init(struct VideoConverter *converter, int in_width, int in_height, - enum AVPixelFormat in_format, enum AVPixelFormat out_format) { +int video_converter_init(struct VideoConverter *converter, int in_width, int in_height, + enum AVPixelFormat in_format, int out_width, int out_height, + enum AVPixelFormat out_format) { converter->in_width = in_width; converter->in_height = in_height; converter->in_format = in_format; - converter->out_format = out_format; - av_frame_unref(converter->dst_frame); + converter->out_width = out_width; + converter->out_height = out_height; + converter->out_format = out_format; - converter->dst_frame->width = in_width; - converter->dst_frame->height = in_height; - converter->dst_frame->format = out_format; + AVFrame *dst_frame = converter->dst_frame; + av_frame_unref(dst_frame); + + dst_frame->format = out_format; + + if (out_width == -1 && out_height == -1) { + dst_frame->width = in_width; + dst_frame->height = in_height; + } else if (out_width == -1) { + int width = in_width * out_height / in_height; + width = width + (width % 2); + + dst_frame->width = width; + dst_frame->height = out_height; + } else if (out_height == -1) { + int height = in_height * out_width / in_width; + height = height + (height % 2); + + dst_frame->width = out_width; + dst_frame->height = height; + } else { + dst_frame->width = out_width; + dst_frame->height = out_height; + } - int ret = av_frame_get_buffer(converter->dst_frame, 0); + int ret = av_frame_get_buffer(dst_frame, 0); if (ret < 0) return ret; - converter->sws_ctx = sws_getContext(in_width, in_height, in_format, in_width, in_height, out_format, - SWS_BILINEAR, NULL, NULL, NULL); + converter->sws_ctx = + sws_getContext(in_width, in_height, in_format, dst_frame->width, dst_frame->height, + dst_frame->format, SWS_BILINEAR, NULL, NULL, NULL); if (!converter->sws_ctx) { XAV_LOG_DEBUG("Couldn't get sws context"); @@ -51,8 +75,8 @@ int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame if (video_converter_resolution_changed(converter, src_frame)) { XAV_LOG_DEBUG("Frame resolution changed"); sws_freeContext(converter->sws_ctx); - ret = video_converter_init(converter, src_frame->width, src_frame->height, - src_frame->format, converter->out_format); + ret = video_converter_init(converter, src_frame->width, src_frame->height, src_frame->format, + converter->out_width, converter->out_height, converter->out_format); if (ret < 0) { return ret; } @@ -61,12 +85,13 @@ int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame converter->dst_frame->pts = src_frame->pts; // is this (const uint8_t * const*) cast really correct? - return sws_scale(converter->sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 0, - src_frame->height, converter->dst_frame->data, converter->dst_frame->linesize); + return sws_scale(converter->sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, + 0, src_frame->height, converter->dst_frame->data, + converter->dst_frame->linesize); } void video_converter_free(struct VideoConverter **converter) { - struct VideoConverter* vc = *converter; + struct VideoConverter *vc = *converter; if (vc != NULL) { if (vc->sws_ctx != NULL) { sws_freeContext((*converter)->sws_ctx); diff --git a/c_src/xav/video_converter.h b/c_src/xav/video_converter.h index c716fc7..4e5d704 100644 --- a/c_src/xav/video_converter.h +++ b/c_src/xav/video_converter.h @@ -6,18 +6,21 @@ #include struct VideoConverter { - struct SwsContext *sws_ctx; - int in_width; - int in_height; - enum AVPixelFormat in_format; - enum AVPixelFormat out_format; - AVFrame *dst_frame; + struct SwsContext *sws_ctx; + int in_width; + int in_height; + enum AVPixelFormat in_format; + int out_width; + int out_height; + enum AVPixelFormat out_format; + AVFrame *dst_frame; }; struct VideoConverter *video_converter_alloc(); -int video_converter_init(struct VideoConverter* converter, int in_width, int in_height, - enum AVPixelFormat in_format, enum AVPixelFormat out_format); +int video_converter_init(struct VideoConverter *converter, int in_width, int in_height, + enum AVPixelFormat in_format, int out_width, int out_height, + enum AVPixelFormat out_format); int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame); diff --git a/c_src/xav/xav_decoder.c b/c_src/xav/xav_decoder.c index f5b9445..4fdd462 100644 --- a/c_src/xav/xav_decoder.c +++ b/c_src/xav/xav_decoder.c @@ -12,19 +12,17 @@ void free_frames(AVFrame **frames, int size) { } } -ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { - if (argc != 4) { +ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 6) { return xav_nif_raise(env, "invalid_arg_count"); } - // resolve codec - unsigned int codec_len; - if (!enif_get_atom_length(env, argv[0], &codec_len, ERL_NIF_LATIN1)) { - return xav_nif_raise(env, "failed_to_get_atom_length"); - } + ERL_NIF_TERM ret; + char *codec = NULL; + char *out_format = NULL; - char *codec = (char *)XAV_ALLOC((codec_len + 1) * sizeof(char *)); - if (enif_get_atom(env, argv[0], codec, codec_len + 1, ERL_NIF_LATIN1) == 0) { + // resolve codec + if (!xav_get_atom(env, argv[0], &codec)) { return xav_nif_raise(env, "failed_to_get_atom"); } @@ -39,22 +37,18 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { } else if (strcmp(codec, "h264") == 0) { media_type = AVMEDIA_TYPE_VIDEO; codec_id = AV_CODEC_ID_H264; - } else if (strcmp(codec, "h265") == 0) { + } else if (strcmp(codec, "h265") == 0 || strcmp(codec, "hevc") == 0) { media_type = AVMEDIA_TYPE_VIDEO; codec_id = AV_CODEC_ID_HEVC; } else { - return xav_nif_raise(env, "failed_to_resolve_codec"); + ret = xav_nif_raise(env, "failed_to_resolve_codec"); + goto clean; } // resolve output format - unsigned int out_format_len; - if (!enif_get_atom_length(env, argv[1], &out_format_len, ERL_NIF_LATIN1)) { - return xav_nif_raise(env, "failed_to_get_atom_length"); - } - - char *out_format = (char *)XAV_ALLOC((out_format_len + 1) * sizeof(char *)); - if (enif_get_atom(env, argv[1], out_format, out_format_len + 1, ERL_NIF_LATIN1) == 0) { - return xav_nif_raise(env, "failed_to_get_atom"); + if (!xav_get_atom(env, argv[1], &out_format)) { + ret = xav_nif_raise(env, "failed_to_get_atom"); + goto clean; } enum AVPixelFormat out_video_fmt = AV_PIX_FMT_NONE; @@ -62,24 +56,40 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { if (media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) { out_video_fmt = av_get_pix_fmt(out_format); if (out_video_fmt == AV_PIX_FMT_NONE) { - return xav_nif_raise(env, "unknown_out_format"); + ret = xav_nif_raise(env, "unknown_out_format"); + goto clean; } } else if (media_type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) { out_audo_fmt = av_get_sample_fmt(out_format); if (out_audo_fmt == AV_SAMPLE_FMT_NONE) { - return xav_nif_raise(env, "unknown_out_format"); + ret = xav_nif_raise(env, "unknown_out_format"); + goto clean; } } // resolve other params int out_sample_rate; if (!enif_get_int(env, argv[2], &out_sample_rate)) { - return xav_nif_raise(env, "invalid_out_sample_rate"); + ret = xav_nif_raise(env, "invalid_out_sample_rate"); + goto clean; } int out_channels; if (!enif_get_int(env, argv[3], &out_channels)) { - return xav_nif_raise(env, "invalid_out_channels"); + ret = xav_nif_raise(env, "invalid_out_channels"); + goto clean; + } + + int out_width; + if (!enif_get_int(env, argv[4], &out_width)) { + ret = xav_nif_raise(env, "failed_to_get_int"); + goto clean; + } + + int out_height; + if (!enif_get_int(env, argv[5], &out_height)) { + ret = xav_nif_raise(env, "failed_to_get_int"); + goto clean; } struct XavDecoder *xav_decoder = @@ -87,26 +97,34 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { xav_decoder->decoder = NULL; xav_decoder->ac = NULL; xav_decoder->vc = NULL; - xav_decoder->out_audio_fmt = out_audo_fmt; xav_decoder->out_video_fmt = out_video_fmt; + xav_decoder->out_width = out_width; + xav_decoder->out_height = out_height; + xav_decoder->out_audio_fmt = out_audo_fmt; xav_decoder->out_sample_rate = out_sample_rate; xav_decoder->out_channels = out_channels; xav_decoder->decoder = decoder_alloc(); if (xav_decoder->decoder == NULL) { - return xav_nif_raise(env, "failed_to_allocate_decoder"); + ret = xav_nif_raise(env, "failed_to_allocate_decoder"); + goto clean; } if (decoder_init(xav_decoder->decoder, media_type, codec_id) != 0) { - return xav_nif_raise(env, "failed_to_init_decoder"); + ret = xav_nif_raise(env, "failed_to_init_decoder"); + goto clean; } - ERL_NIF_TERM decoder_term = enif_make_resource(env, xav_decoder); + ret = enif_make_resource(env, xav_decoder); enif_release_resource(xav_decoder); - XAV_FREE(out_format); +clean: + if (codec != NULL) + XAV_FREE(codec); + if (out_format != NULL) + XAV_FREE(out_format); - return decoder_term; + return ret; } ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *frame) { @@ -116,7 +134,9 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *fr if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) { XAV_LOG_DEBUG("Converting video to RGB"); - if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE) { + // no pixel format conversion and no scaling + if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE && xav_decoder->out_width == -1 && + xav_decoder->out_height == -1) { return xav_nif_video_frame_to_term(env, frame); } @@ -299,8 +319,12 @@ static int init_video_converter(struct XavDecoder *xav_decoder, AVFrame *frame) return -1; } - return video_converter_init(xav_decoder->vc, frame->width, frame->height, - frame->format, xav_decoder->out_video_fmt); + enum AVPixelFormat out_format = xav_decoder->out_video_fmt; + if (out_format == AV_PIX_FMT_NONE) + out_format = frame->format; + + return video_converter_init(xav_decoder->vc, frame->width, frame->height, frame->format, + xav_decoder->out_width, xav_decoder->out_height, out_format); } void free_xav_decoder(ErlNifEnv *env, void *obj) { @@ -319,7 +343,7 @@ void free_xav_decoder(ErlNifEnv *env, void *obj) { } } -static ErlNifFunc xav_funcs[] = {{"new", 4, new}, +static ErlNifFunc xav_funcs[] = {{"new", 6, new}, {"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND}}; diff --git a/c_src/xav/xav_decoder.h b/c_src/xav/xav_decoder.h index 15813fa..9b29111 100644 --- a/c_src/xav/xav_decoder.h +++ b/c_src/xav/xav_decoder.h @@ -1,14 +1,18 @@ #include "audio_converter.h" -#include "video_converter.h" #include "decoder.h" +#include "video_converter.h" #include struct XavDecoder { struct Decoder *decoder; - struct AudioConverter *ac; + // Video params struct VideoConverter *vc; enum AVPixelFormat out_video_fmt; + int out_width; + int out_height; + // Audio params + struct AudioConverter *ac; enum AVSampleFormat out_audio_fmt; int out_sample_rate; int out_channels; diff --git a/c_src/xav/xav_reader.c b/c_src/xav/xav_reader.c index b0ef045..33c612c 100644 --- a/c_src/xav/xav_reader.c +++ b/c_src/xav/xav_reader.c @@ -5,7 +5,7 @@ static int init_video_converter(struct XavReader *xav_reader, AVFrame *frame); ErlNifResourceType *xav_reader_resource_type; -ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { +ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { if (argc != 6) { return xav_nif_raise(env, "invalid_arg_count"); } @@ -290,8 +290,8 @@ static int init_video_converter(struct XavReader *xav_reader, AVFrame *frame) { return -1; } - return video_converter_init(xav_reader->vc, frame->width, frame->height, - frame->format, AV_PIX_FMT_RGB24); + return video_converter_init(xav_reader->vc, frame->width, frame->height, frame->format, + frame->width, frame->height, AV_PIX_FMT_RGB24); } void free_xav_reader(ErlNifEnv *env, void *obj) { diff --git a/c_src/xav/xav_reader.h b/c_src/xav/xav_reader.h index 4e22b8f..3849aa8 100644 --- a/c_src/xav/xav_reader.h +++ b/c_src/xav/xav_reader.h @@ -1,6 +1,6 @@ #include "audio_converter.h" -#include "video_converter.h" #include "reader.h" +#include "video_converter.h" struct XavReader { struct Reader *reader; diff --git a/c_src/xav/xav_video_converter.c b/c_src/xav/xav_video_converter.c index 16dc660..8f498a5 100644 --- a/c_src/xav/xav_video_converter.c +++ b/c_src/xav/xav_video_converter.c @@ -1,38 +1,68 @@ #include "xav_video_converter.h" -ErlNifResourceType * xav_video_converter_resource_type; +ErlNifResourceType *xav_video_converter_resource_type; -ERL_NIF_TERM new(ErlNifEnv * env, int argc, const ERL_NIF_TERM argv[]) { - if (argc != 1) { +static int init_video_converter(struct XavVideoConverter *converter) { + converter->vc = video_converter_alloc(); + if (converter->vc == NULL) { + return -1; + } + + AVFrame *in_frame = converter->frame; + + enum AVPixelFormat out_pix_fmt = converter->out_format; + if (out_pix_fmt == AV_PIX_FMT_NONE) { + out_pix_fmt = in_frame->format; + } + + return video_converter_init(converter->vc, in_frame->width, in_frame->height, in_frame->format, + converter->out_width, converter->out_height, out_pix_fmt); +} + +ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 3) { return xav_nif_error(env, "invalid_arg_count"); } ERL_NIF_TERM ret; - enum AVPixelFormat pix_fmt; + enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; + int width, height; char *format = NULL; - if(!xav_get_atom(env, argv[0], &format)) { + if (!xav_get_atom(env, argv[0], &format)) { return xav_nif_raise(env, "failed_to_get_atom"); } - pix_fmt = av_get_pix_fmt(format); - if (pix_fmt == AV_PIX_FMT_NONE) { - ret = xav_nif_raise(env, "unknown_format"); - goto fail; + if (strcmp(format, "nil") != 0) { + pix_fmt = av_get_pix_fmt(format); + if (pix_fmt == AV_PIX_FMT_NONE) { + ret = xav_nif_raise(env, "unknown_format"); + goto clean; + } + } + + if (!enif_get_int(env, argv[1], &width)) { + ret = xav_nif_raise(env, "failed_to_get_int"); + goto clean; + } + + if (!enif_get_int(env, argv[2], &height)) { + ret = xav_nif_raise(env, "failed_to_get_int"); + goto clean; } - struct XavVideoConverter *xav_video_converter = enif_alloc_resource(xav_video_converter_resource_type, - sizeof(xav_video_converter)); + struct XavVideoConverter *xav_video_converter = + enif_alloc_resource(xav_video_converter_resource_type, sizeof(struct XavVideoConverter)); xav_video_converter->vc = NULL; xav_video_converter->frame = av_frame_alloc(); xav_video_converter->out_format = pix_fmt; + xav_video_converter->out_width = width; + xav_video_converter->out_height = height; - ERL_NIF_TERM converter_term = enif_make_resource(env, xav_video_converter); + ret = enif_make_resource(env, xav_video_converter); enif_release_resource(xav_video_converter); - ret = xav_nif_ok(env, converter_term); - -fail: +clean: XAV_FREE(format); return ret; @@ -44,7 +74,8 @@ ERL_NIF_TERM convert(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { } struct XavVideoConverter *xav_video_converter; - if (!enif_get_resource(env, argv[0], xav_video_converter_resource_type, (void**) &xav_video_converter)) { + if (!enif_get_resource(env, argv[0], xav_video_converter_resource_type, + (void **)&xav_video_converter)) { return xav_nif_raise(env, "couldnt_get_converter_resource"); } @@ -81,23 +112,16 @@ ERL_NIF_TERM convert(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { src_frame->height = height; src_frame->format = pix_fmt; - ret = av_image_fill_arrays(src_frame->data, src_frame->linesize, in_data.data, - src_frame->format, width, height, 1); - + int int_ret = av_image_fill_arrays(src_frame->data, src_frame->linesize, in_data.data, + src_frame->format, width, height, 1); - if (ret < 0) { + if (int_ret < 0) { ret = xav_nif_raise(env, "failed_to_fill_arrays"); goto clean; } if (xav_video_converter->vc == NULL) { - xav_video_converter->vc = video_converter_alloc(); - if (xav_video_converter->vc == NULL) { - ret = xav_nif_raise(env, "failed_to_allocate_converter"); - goto clean; - } - - if (video_converter_init(xav_video_converter->vc, width, height, pix_fmt, xav_video_converter->out_format) < 0) { + if (init_video_converter(xav_video_converter) < 0) { ret = xav_nif_raise(env, "failed_to_init_converter"); goto clean; } @@ -111,14 +135,15 @@ ERL_NIF_TERM convert(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { ret = xav_nif_video_frame_to_term(env, xav_video_converter->vc->dst_frame); clean: - if (format != NULL) XAV_FREE(format); + if (format != NULL) + XAV_FREE(format); return ret; } -void free_xav_video_converter(ErlNifEnv * env, void * obj) { +void free_xav_video_converter(ErlNifEnv *env, void *obj) { XAV_LOG_DEBUG("Freeing XavVideoConverter object"); - struct XavVideoConverter * xav_video_converter = (struct XavVideoConverter * ) obj; + struct XavVideoConverter *xav_video_converter = (struct XavVideoConverter *)obj; if (xav_video_converter->vc != NULL) { video_converter_free(&xav_video_converter->vc); } @@ -126,12 +151,12 @@ void free_xav_video_converter(ErlNifEnv * env, void * obj) { av_frame_free(&xav_video_converter->frame); } -static ErlNifFunc xav_funcs[] = {{"new", 1, new}, +static ErlNifFunc xav_funcs[] = {{"new", 3, new}, {"convert", 5, convert, ERL_NIF_DIRTY_JOB_CPU_BOUND}}; -static int load(ErlNifEnv * env, void ** priv, ERL_NIF_TERM load_info) { - xav_video_converter_resource_type = - enif_open_resource_type(env, NULL, "XavVideoConverter", free_xav_video_converter, ERL_NIF_RT_CREATE, NULL); +static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) { + xav_video_converter_resource_type = enif_open_resource_type( + env, NULL, "XavVideoConverter", free_xav_video_converter, ERL_NIF_RT_CREATE, NULL); return 0; } diff --git a/c_src/xav/xav_video_converter.h b/c_src/xav/xav_video_converter.h index f21655e..4de5274 100644 --- a/c_src/xav/xav_video_converter.h +++ b/c_src/xav/xav_video_converter.h @@ -1,8 +1,10 @@ -#include "video_converter.h" #include "utils.h" +#include "video_converter.h" struct XavVideoConverter { - struct VideoConverter *vc; - enum AVPixelFormat out_format; - AVFrame* frame; + struct VideoConverter *vc; + enum AVPixelFormat out_format; + int out_width; + int out_height; + AVFrame *frame; }; \ No newline at end of file diff --git a/lib/decoder.ex b/lib/decoder.ex index 9825ed4..efb79cf 100644 --- a/lib/decoder.ex +++ b/lib/decoder.ex @@ -16,7 +16,9 @@ defmodule Xav.Decoder do @type opts :: [ out_format: Xav.Frame.format(), out_sample_rate: integer(), - out_channels: integer() + out_channels: integer(), + out_width: Xav.Frame.width(), + out_height: Xav.Frame.height() ] @doc """ @@ -59,7 +61,9 @@ defmodule Xav.Decoder do out_format = opts[:out_format] out_sample_rate = opts[:out_sample_rate] || 0 out_channels = opts[:out_channels] || 0 - Xav.Decoder.NIF.new(codec, out_format, out_sample_rate, out_channels) + out_width = opts[:out_width] || -1 + out_height = opts[:out_height] || -1 + Xav.Decoder.NIF.new(codec, out_format, out_sample_rate, out_channels, out_width, out_height) end @doc """ diff --git a/lib/decoder_nif.ex b/lib/decoder_nif.ex index f2468d2..4485c75 100644 --- a/lib/decoder_nif.ex +++ b/lib/decoder_nif.ex @@ -8,7 +8,9 @@ defmodule Xav.Decoder.NIF do :ok = :erlang.load_nif(path, 0) end - def new(_codec, _out_format, _out_sample_rate, _out_channels), do: :erlang.nif_error(:undef) + def new(_codec, _out_format, _out_sample_rate, _out_channels, _out_width, _out_height) do + :erlang.nif_error(:undef) + end def decode(_decoder, _data, _pts, _dts), do: :erlang.nif_error(:undef) diff --git a/lib/frame.ex b/lib/frame.ex index 8b03ba6..572c0a8 100644 --- a/lib/frame.ex +++ b/lib/frame.ex @@ -23,12 +23,15 @@ defmodule Xav.Frame do @type format() :: audio_format() | video_format() + @type width :: non_neg_integer() | nil + @type height :: non_neg_integer() | nil + @type t() :: %__MODULE__{ type: :audio | :video, data: binary(), format: format(), - width: non_neg_integer() | nil, - height: non_neg_integer() | nil, + width: width(), + height: height(), samples: integer() | nil, pts: integer() } diff --git a/lib/video_converter.ex b/lib/video_converter.ex index f5ffc89..ec6a9b8 100644 --- a/lib/video_converter.ex +++ b/lib/video_converter.ex @@ -2,50 +2,70 @@ defmodule Xav.VideoConverter do @moduledoc """ Video samples converter. - Currently it only supports pixel format conversion. + It supports pixel format conversion and/or scaling. """ alias Xav.Frame alias Xav.VideoConverter.NIF - @type t :: %__MODULE__{format: Frame.video_format(), converter: reference()} + @type t :: %__MODULE__{ + converter: reference(), + out_format: Frame.video_format(), + out_width: Frame.width(), + out_height: Frame.height() + } @typedoc """ Type definition for converter options. - * `format` - video format to convert to (`e.g. :rgb24`). + * `out_format` - video format to convert to (`e.g. :rgb24`). + * `out_width` - scale the video frame to this width. + * `out_height` - scale the video frame to this height. + + If `out_width` and `out_height` are both not provided, scaling is not performed. If one of the + dimensions is `nil`, the other will be calculated based on the input dimensions as + to keep the aspect ratio. """ - @type converter_opts() :: [format: Frame.video_format()] + @type converter_opts() :: [ + out_format: Frame.video_format(), + out_width: Frame.width(), + out_height: Frame.height() + ] - @enforce_keys [:format] - defstruct [:format, :converter] + defstruct [:converter, :out_format, :out_width, :out_height] @doc """ Creates a new video converter. """ - @spec new(converter_opts()) :: {:ok, t()} | {:error, any()} + @spec new(converter_opts()) :: t() def new(converter_opts) do - with {:ok, converter} <- NIF.new(converter_opts[:format]) do - {:ok, %__MODULE__{format: converter_opts[:format], converter: converter}} - end - end + opts = Keyword.validate!(converter_opts, [:out_format, :out_width, :out_height]) - @doc """ - Same as `new/1` but raises an exception in case of an error. - """ - @spec new!(converter_opts()) :: t() - def new!(converter_opts) do - case new(converter_opts) do - {:ok, ref} -> ref - {:error, reason} -> raise "Couldn't create a video converter. Reason: #{inspect(reason)}" + if is_nil(opts[:out_format]) and is_nil(opts[:out_width]) and is_nil(opts[:out_height]) do + raise "At least one of `out_format`, `out_width` or `out_height` must be provided" end + + :ok = validate_converter_options(opts) + + converter = NIF.new(opts[:out_format], opts[:out_width] || -1, opts[:out_height] || -1) + + %__MODULE__{ + converter: converter, + out_format: opts[:out_format], + out_width: opts[:out_width], + out_height: opts[:out_height] + } end @doc """ Converts a video frame. """ @spec convert(t(), Frame.t()) :: Frame.t() - def convert(%__MODULE__{format: format}, %Frame{format: format} = frame), do: frame + def convert( + %__MODULE__{out_format: format, out_width: nil, out_height: nil}, + %Frame{format: format} = frame + ), + do: frame def convert(%__MODULE__{converter: converter}, frame) do {data, out_format, width, height, _pts} = @@ -60,4 +80,28 @@ defmodule Xav.VideoConverter do pts: frame.pts } end + + defp validate_converter_options([]), do: :ok + + defp validate_converter_options([{_key, nil} | opts]) do + validate_converter_options(opts) + end + + defp validate_converter_options([{key, value} | _opts]) + when key in [:out_width, :out_height] and not is_integer(value) do + raise %ArgumentError{ + message: "Expected an integer value for #{inspect(key)}, received: #{inspect(value)}" + } + end + + defp validate_converter_options([{key, value} | _opts]) + when key in [:out_width, :out_height] and value < 1 do + raise %ArgumentError{ + message: "Invalid value for #{inspect(key)}, expected a value to be >= 1" + } + end + + defp validate_converter_options([{_key, _value} | opts]) do + validate_converter_options(opts) + end end diff --git a/lib/video_converter_nif.ex b/lib/video_converter_nif.ex index e09051e..9521df1 100644 --- a/lib/video_converter_nif.ex +++ b/lib/video_converter_nif.ex @@ -8,7 +8,7 @@ defmodule Xav.VideoConverter.NIF do :ok = :erlang.load_nif(path, 0) end - def new(_format), do: :erlang.nif_error(:undef) + def new(_format, _width, _height), do: :erlang.nif_error(:undef) def convert(_converter, _frame, _width, _height, _pix_format), do: :erlang.nif_error(:undef) end diff --git a/test/decoder_test.exs b/test/decoder_test.exs index 0011cbd..dd1d643 100644 --- a/test/decoder_test.exs +++ b/test/decoder_test.exs @@ -365,5 +365,14 @@ defmodule Xav.DecoderTest do assert byte_size(frame) == 640 * 480 * 3 end + + test "scale video frame" do + decoder = Xav.Decoder.new(:vp8, out_width: 240, out_height: 180) + + assert {:ok, %Xav.Frame{width: 240, height: 180, pts: 0, data: frame, format: :yuv420p}} = + Xav.Decoder.decode(decoder, @vp8_keyframe) + + assert byte_size(frame) == 240 * 180 * 3 / 2 + end end end diff --git a/test/video_converter_test.exs b/test/video_converter_test.exs index 7e22c2f..42bb0fc 100644 --- a/test/video_converter_test.exs +++ b/test/video_converter_test.exs @@ -1,34 +1,42 @@ defmodule Xav.VideoConverterTest do use ExUnit.Case, async: true - test "new/1" do - assert {:ok, %Xav.VideoConverter{format: :rgb24, converter: converter}} = - Xav.VideoConverter.new(format: :rgb24) + describe "new/1" do + test "new converter" do + assert %Xav.VideoConverter{out_format: :rgb24, converter: converter} = + Xav.VideoConverter.new(out_format: :rgb24) - assert is_reference(converter) - end + assert is_reference(converter) + end + + test "fails when no option is provided" do + assert_raise RuntimeError, fn -> Xav.VideoConverter.new(out_format: nil) end + end - test "new!/1" do - assert %Xav.VideoConverter{} = Xav.VideoConverter.new!(format: :rgb24) - assert_raise ErlangError, fn -> Xav.VideoConverter.new!(format: :rgb) end + test "fails on invalid options" do + assert_raise ArgumentError, fn -> Xav.VideoConverter.new(out_width: 0) end + assert_raise ArgumentError, fn -> Xav.VideoConverter.new(out_height: "15") end + end end describe "convert/2" do setup do - %{converter: Xav.VideoConverter.new!(format: :rgb24)} - end + frame_480p = %Xav.Frame{ + type: :video, + data: File.read!("test/fixtures/video_converter/frame_480x360.yuv"), + format: :yuv420p, + width: 480, + height: 360, + pts: 0 + } - test "convert video format", %{converter: converter} do - assert frame = - Xav.VideoConverter.convert(converter, %Xav.Frame{ - type: :video, - data: File.read!("test/fixtures/video_converter/frame_480x360.yuv"), - format: :yuv420p, - width: 480, - height: 360, - pts: 0 - }) + %{ + converter: Xav.VideoConverter.new(out_format: :rgb24), + frame_480p: frame_480p + } + end + test "convert video format", %{converter: converter, frame_480p: frame_480p} do # reference frame is generated using ffmeg # ffmpeg -f rawvideo -pix_fmt yuv420p -video_size 480x360 -i frame_480x360.yuv -pix_fmt rgb24 ref_frame_480x360.yuv ref_data = File.read!("test/fixtures/video_converter/ref_frame_480x360.rgb") @@ -40,19 +48,11 @@ defmodule Xav.VideoConverterTest do width: 480, height: 360, pts: 0 - } = frame + } = Xav.VideoConverter.convert(converter, frame_480p) end - test "converter re-init on resolution change", %{converter: converter} do - frame1 = %Xav.Frame{ - type: :video, - data: File.read!("test/fixtures/video_converter/frame_480x360.yuv"), - format: :yuv420p, - width: 480, - height: 360 - } - - frame2 = %Xav.Frame{ + test "converter re-init on resolution change", %{converter: converter, frame_480p: frame_480p} do + frame_360p = %Xav.Frame{ type: :video, data: File.read!("test/fixtures/video_converter/frame_360x240.yuv"), format: :yuv420p, @@ -61,13 +61,38 @@ defmodule Xav.VideoConverterTest do } assert %Xav.Frame{format: :rgb24, data: ref_frame1} = - Xav.VideoConverter.convert(converter, frame1) + Xav.VideoConverter.convert(converter, frame_480p) assert %Xav.Frame{format: :rgb24, data: ref_frame2} = - Xav.VideoConverter.convert(converter, frame2) + Xav.VideoConverter.convert(converter, frame_360p) assert byte_size(ref_frame1) == 480 * 360 * 3 assert byte_size(ref_frame2) == 360 * 240 * 3 end + + test "scale video frame", %{frame_480p: frame_480p} do + converter = Xav.VideoConverter.new(out_width: 368) + + assert %Xav.Frame{ + type: :video, + format: :yuv420p, + data: data, + width: 368, + height: 276 + } = Xav.VideoConverter.convert(converter, frame_480p) + + assert byte_size(data) == 368 * 276 * 3 / 2 + end + + test "scale and convert video frame", %{frame_480p: frame_480p} do + converter = Xav.VideoConverter.new(out_width: 360, out_height: 240, out_format: :rgb24) + + assert %Xav.Frame{ + type: :video, + format: :rgb24, + width: 360, + height: 240 + } = Xav.VideoConverter.convert(converter, frame_480p) + end end end