From c43f7105e17c226c21e044f39bda358ce1431541 Mon Sep 17 00:00:00 2001 From: Billal Ghilas <84322223+gBillal@users.noreply.github.com> Date: Mon, 27 Jan 2025 19:14:17 +0100 Subject: [PATCH] Add encoder (h26x) (#29) * Add encoder * fix reviews * Add gop_size and max_b_frames options * add profile option * add tests * improve test --- Makefile | 11 +- c_src/xav/encoder.c | 107 ++++++++++++++ c_src/xav/encoder.h | 30 ++++ c_src/xav/utils.c | 27 +++- c_src/xav/utils.h | 5 +- c_src/xav/xav_decoder.c | 4 +- c_src/xav/xav_encoder.c | 244 ++++++++++++++++++++++++++++++++ c_src/xav/xav_encoder.h | 8 ++ c_src/xav/xav_video_converter.c | 4 +- lib/encoder.ex | 131 +++++++++++++++++ lib/encoder_nif.ex | 16 +++ lib/packet.ex | 19 +++ mix.exs | 1 + mix.lock | 1 + test/encoder_test.exs | 115 +++++++++++++++ 15 files changed, 710 insertions(+), 13 deletions(-) create mode 100644 c_src/xav/encoder.c create mode 100644 c_src/xav/encoder.h create mode 100644 c_src/xav/xav_encoder.c create mode 100644 c_src/xav/xav_encoder.h create mode 100644 lib/encoder.ex create mode 100644 lib/encoder_nif.ex create mode 100644 lib/packet.ex create mode 100644 test/encoder_test.exs diff --git a/Makefile b/Makefile index 852521f..8a2216b 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ XAV_DIR = c_src/xav PRIV_DIR = $(MIX_APP_PATH)/priv XAV_DECODER_SO = $(PRIV_DIR)/libxavdecoder.so +XAV_ENCODER_SO = $(PRIV_DIR)/libxavencoder.so XAV_READER_SO = $(PRIV_DIR)/libxavreader.so XAV_VIDEO_CONVERTER_SO = $(PRIV_DIR)/libxavvideoconverter.so @@ -15,6 +16,9 @@ XAV_VIDEO_CONVERTER_SO = $(PRIV_DIR)/libxavvideoconverter.so DECODER_HEADERS = $(XAV_DIR)/xav_decoder.h $(XAV_DIR)/decoder.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h DECODER_SOURCES = $(XAV_DIR)/xav_decoder.c $(XAV_DIR)/decoder.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c +ENCODER_HEADERS = $(XAV_DIR)/xav_encoder.h $(XAV_DIR)/encoder.h $(XAV_DIR)/utils.h +ENCODER_SOURCES = $(XAV_DIR)/xav_encoder.c $(XAV_DIR)/encoder.c $(XAV_DIR)/utils.c + READER_HEADERS = $(XAV_DIR)/xav_reader.h $(XAV_DIR)/reader.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h READER_SOURCES = $(XAV_DIR)/xav_reader.c $(XAV_DIR)/reader.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c @@ -42,7 +46,7 @@ ifneq (,$(wildcard /etc/fedora-release)) LFLAGS += $$(pkg-config --libs-only-L libavcodec libswscale libavutil libavformat libavdevice libswresample) endif -all: $(XAV_DECODER_SO) $(XAV_READER_SO) $(XAV_VIDEO_CONVERTER_SO) +all: $(XAV_DECODER_SO) $(XAV_READER_SO) $(XAV_VIDEO_CONVERTER_SO) $(XAV_ENCODER_SO) $(XAV_DECODER_SO): Makefile $(DECODER_SOURCES) $(DECODER_HEADERS) mkdir -p $(PRIV_DIR) @@ -56,6 +60,11 @@ $(XAV_VIDEO_CONVERTER_SO): Makefile $(VIDEO_CONVERTER_SOURCES) $(VIDEO_CONVERTER mkdir -p $(PRIV_DIR) $(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(VIDEO_CONVERTER_SOURCES) -o $(XAV_VIDEO_CONVERTER_SO) $(LDFLAGS) + +$(XAV_ENCODER_SO): Makefile $(ENCODER_SOURCES) $(ENCODER_HEADERS) + mkdir -p $(PRIV_DIR) + $(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(ENCODER_SOURCES) -o $(XAV_ENCODER_SO) $(LDFLAGS) + format: clang-format -i $(XAV_DIR)/* diff --git a/c_src/xav/encoder.c b/c_src/xav/encoder.c new file mode 100644 index 0000000..757c429 --- /dev/null +++ b/c_src/xav/encoder.c @@ -0,0 +1,107 @@ +#include "encoder.h" + +struct Encoder *encoder_alloc() { + struct Encoder *encoder = XAV_ALLOC(sizeof(struct Encoder)); + encoder->c = NULL; + encoder->codec = NULL; + encoder->num_packets = 0; + encoder->max_num_packets = 8; + encoder->packets = XAV_ALLOC(encoder->max_num_packets * sizeof(AVPacket *)); + + for (int i = 0; i < encoder->max_num_packets; i++) { + encoder->packets[i] = av_packet_alloc(); + } + + return encoder; +} + +int encoder_init(struct Encoder *encoder, struct EncoderConfig *config) { + encoder->codec = avcodec_find_encoder(config->codec); + if (!encoder->codec) { + return -1; + } + + encoder->c = avcodec_alloc_context3(encoder->codec); + if (!encoder->c) { + return -1; + } + + encoder->c->width = config->width; + encoder->c->height = config->height; + encoder->c->pix_fmt = config->format; + encoder->c->time_base = config->time_base; + + if (config->profile != FF_PROFILE_UNKNOWN) { + encoder->c->profile = config->profile; + } + + if (config->gop_size > 0) { + encoder->c->gop_size = config->gop_size; + } + + if (config->max_b_frames >= 0) { + encoder->c->max_b_frames = config->max_b_frames; + } + + AVDictionary *opts = NULL; + if (config->codec == AV_CODEC_ID_HEVC) { + char x265_params[256] = "log-level=warning"; + if (config->gop_size > 0) { + sprintf(x265_params + strlen(x265_params), ":keyint=%d", config->gop_size); + } + + if (config->max_b_frames >= 0) { + sprintf(x265_params + strlen(x265_params), ":bframes=%d", config->max_b_frames); + } + + av_dict_set(&opts, "x265-params", x265_params, 0); + } + + return avcodec_open2(encoder->c, encoder->codec, &opts); +} + +int encoder_encode(struct Encoder *encoder, AVFrame *frame) { + int ret = avcodec_send_frame(encoder->c, frame); + if (ret < 0) { + return ret; + } + + encoder->num_packets = 0; + + while (1) { + ret = avcodec_receive_packet(encoder->c, encoder->packets[encoder->num_packets]); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { + break; + } else if (ret < 0) { + return ret; + } + + if (++encoder->num_packets >= encoder->max_num_packets) { + encoder->max_num_packets *= 2; + encoder->packets = + XAV_REALLOC(encoder->packets, encoder->max_num_packets * sizeof(AVPacket *)); + for (int i = encoder->num_packets; i < encoder->max_num_packets; i++) { + encoder->packets[i] = av_packet_alloc(); + } + } + } + + return 0; +} + +void encoder_free(struct Encoder **encoder) { + if (*encoder != NULL) { + struct Encoder *e = *encoder; + + if (e->c != NULL) { + avcodec_free_context(&e->c); + } + + for (int i = 0; i < e->max_num_packets; i++) { + av_packet_free(&e->packets[i]); + } + + XAV_FREE(e); + *encoder = NULL; + } +} \ No newline at end of file diff --git a/c_src/xav/encoder.h b/c_src/xav/encoder.h new file mode 100644 index 0000000..5f540be --- /dev/null +++ b/c_src/xav/encoder.h @@ -0,0 +1,30 @@ +#include "utils.h" +#include + +struct Encoder { + const AVCodec *codec; + AVCodecContext *c; + int num_packets; + int max_num_packets; + AVPacket **packets; +}; + +struct EncoderConfig { + enum AVMediaType media_type; + enum AVCodecID codec; + int width; + int height; + enum AVPixelFormat format; + AVRational time_base; + int gop_size; + int max_b_frames; + int profile; +}; + +struct Encoder *encoder_alloc(); + +int encoder_init(struct Encoder *encoder, struct EncoderConfig *encoder_config); + +int encoder_encode(struct Encoder *encoder, AVFrame *frame); + +void encoder_free(struct Encoder **encoder); \ No newline at end of file diff --git a/c_src/xav/utils.c b/c_src/xav/utils.c index f62b471..b106092 100644 --- a/c_src/xav/utils.c +++ b/c_src/xav/utils.c @@ -1,5 +1,4 @@ #include "utils.h" -#include #include #include #include @@ -20,19 +19,19 @@ ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg) { return enif_raise_exception(env, reason); } -int xav_get_atom(ErlNifEnv *env, ERL_NIF_TERM atom, char **value) { +int xav_nif_get_atom(ErlNifEnv *env, ERL_NIF_TERM term, char **value) { unsigned int atom_len; - if (!enif_get_atom_length(env, atom, &atom_len, ERL_NIF_LATIN1)) { + if (!enif_get_atom_length(env, term, &atom_len, ERL_NIF_LATIN1)) { return 0; } - char *format = (char *)XAV_ALLOC((atom_len * 1) * sizeof(char *)); - if (!enif_get_atom(env, atom, format, atom_len + 1, ERL_NIF_LATIN1)) { - XAV_FREE(format); + char *atom_value = (char *)XAV_ALLOC((atom_len + 1) * sizeof(char *)); + if (!enif_get_atom(env, term, atom_value, atom_len + 1, ERL_NIF_LATIN1)) { + XAV_FREE(atom_value); return 0; } - *value = format; + *value = atom_value; return 1; } @@ -66,3 +65,17 @@ ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame) { ERL_NIF_TERM pts_term = enif_make_int64(env, frame->pts); return enif_make_tuple(env, 5, data_term, format_term, width_term, height_term, pts_term); } + +ERL_NIF_TERM xav_nif_packet_to_term(ErlNifEnv *env, AVPacket *packet) { + ERL_NIF_TERM data_term; + + unsigned char *ptr = enif_make_new_binary(env, packet->size, &data_term); + + memcpy(ptr, packet->data, packet->size); + + ERL_NIF_TERM dts = enif_make_int(env, packet->dts); + ERL_NIF_TERM pts = enif_make_int(env, packet->pts); + ERL_NIF_TERM is_keyframe = + enif_make_atom(env, packet->flags & AV_PKT_FLAG_KEY ? "true" : "false"); + return enif_make_tuple(env, 4, data_term, dts, pts, is_keyframe); +} diff --git a/c_src/xav/utils.h b/c_src/xav/utils.h index 094752d..9837a3c 100644 --- a/c_src/xav/utils.h +++ b/c_src/xav/utils.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -14,11 +15,13 @@ #endif #define XAV_ALLOC(X) enif_alloc(X) +#define XAV_REALLOC(X, Y) enif_realloc(X, Y) #define XAV_FREE(X) enif_free(X) ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term); ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason); ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg); -int xav_get_atom(ErlNifEnv *env, ERL_NIF_TERM atom, char **value); +int xav_nif_get_atom(ErlNifEnv *env, ERL_NIF_TERM term, char **value); ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame); ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples, int out_size, enum AVSampleFormat out_format, int pts); +ERL_NIF_TERM xav_nif_packet_to_term(ErlNifEnv *env, AVPacket *packet); diff --git a/c_src/xav/xav_decoder.c b/c_src/xav/xav_decoder.c index 4fdd462..441bbac 100644 --- a/c_src/xav/xav_decoder.c +++ b/c_src/xav/xav_decoder.c @@ -22,7 +22,7 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { char *out_format = NULL; // resolve codec - if (!xav_get_atom(env, argv[0], &codec)) { + if (!xav_nif_get_atom(env, argv[0], &codec)) { return xav_nif_raise(env, "failed_to_get_atom"); } @@ -46,7 +46,7 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { } // resolve output format - if (!xav_get_atom(env, argv[1], &out_format)) { + if (!xav_nif_get_atom(env, argv[1], &out_format)) { ret = xav_nif_raise(env, "failed_to_get_atom"); goto clean; } diff --git a/c_src/xav/xav_encoder.c b/c_src/xav/xav_encoder.c new file mode 100644 index 0000000..07b01a8 --- /dev/null +++ b/c_src/xav/xav_encoder.c @@ -0,0 +1,244 @@ +#include "xav_encoder.h" + +ErlNifResourceType *xav_encoder_resource_type; + +static ERL_NIF_TERM packets_to_term(ErlNifEnv *, struct Encoder *); +static int get_profile(enum AVCodecID, const char *); + +ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 2) { + return xav_nif_raise(env, "invalid_arg_count"); + } + + ERL_NIF_TERM ret; + struct EncoderConfig encoder_config = {0}; + encoder_config.max_b_frames = -1; + encoder_config.profile = FF_PROFILE_UNKNOWN; + + char *codec = NULL, *format = NULL, *profile = NULL; + + ErlNifMapIterator iter; + ERL_NIF_TERM key, value; + char *config_name = NULL; + int err; + + if (!xav_nif_get_atom(env, argv[0], &codec)) { + return xav_nif_raise(env, "failed_to_get_atom"); + } + + if (!enif_is_map(env, argv[1])) { + return xav_nif_raise(env, "failed_to_get_map"); + } + + if (strcmp(codec, "h264") == 0) { + encoder_config.media_type = AVMEDIA_TYPE_VIDEO; + encoder_config.codec = AV_CODEC_ID_H264; + } else if (strcmp(codec, "h265") == 0 || strcmp(codec, "hevc") == 0) { + encoder_config.media_type = AVMEDIA_TYPE_VIDEO; + encoder_config.codec = AV_CODEC_ID_HEVC; + } else { + ret = xav_nif_raise(env, "failed_to_resolve_codec"); + goto clean; + } + + enif_map_iterator_create(env, argv[1], &iter, ERL_NIF_MAP_ITERATOR_FIRST); + + while (enif_map_iterator_get_pair(env, &iter, &key, &value)) { + if (!xav_nif_get_atom(env, key, &config_name)) { + ret = xav_nif_raise(env, "failed_to_get_map_key"); + goto clean; + } + + if (strcmp(config_name, "width") == 0) { + err = enif_get_int(env, value, &encoder_config.width); + } else if (strcmp(config_name, "height") == 0) { + err = enif_get_int(env, value, &encoder_config.height); + } else if (strcmp(config_name, "format") == 0) { + err = xav_nif_get_atom(env, value, &format); + } else if (strcmp(config_name, "time_base_num") == 0) { + err = enif_get_int(env, value, &encoder_config.time_base.num); + } else if (strcmp(config_name, "time_base_den") == 0) { + err = enif_get_int(env, value, &encoder_config.time_base.den); + } else if (strcmp(config_name, "gop_size") == 0) { + err = enif_get_int(env, value, &encoder_config.gop_size); + } else if (strcmp(config_name, "max_b_frames") == 0) { + err = enif_get_int(env, value, &encoder_config.max_b_frames); + } else if (strcmp(config_name, "profile") == 0) { + err = xav_nif_get_atom(env, value, &profile); + } else { + ret = xav_nif_raise(env, "unknown_config_key"); + goto clean; + } + + if (!err) { + ret = xav_nif_raise(env, "couldnt_read_value"); + goto clean; + } + + XAV_FREE(config_name); + enif_map_iterator_next(env, &iter); + } + + encoder_config.format = av_get_pix_fmt(format); + if (encoder_config.format == AV_PIX_FMT_NONE) { + ret = xav_nif_raise(env, "unknown_format"); + goto clean; + } + + if (profile) { + encoder_config.profile = get_profile(encoder_config.codec, profile); + if (encoder_config.profile == FF_PROFILE_UNKNOWN) { + ret = xav_nif_raise(env, "invalid_profile"); + goto clean; + } + } + + struct XavEncoder *xav_encoder = + enif_alloc_resource(xav_encoder_resource_type, sizeof(struct XavEncoder)); + + xav_encoder->frame = av_frame_alloc(); + xav_encoder->encoder = encoder_alloc(); + if (encoder_init(xav_encoder->encoder, &encoder_config) < 0) { + ret = xav_nif_raise(env, "failed_to_init_encoder"); + goto clean; + } + + ret = enif_make_resource(env, xav_encoder); + enif_release_resource(xav_encoder); + +clean: + if (!codec) + XAV_FREE(codec); + if (!format) + XAV_FREE(format); + if (!config_name) + XAV_FREE(config_name); + if (!profile) + XAV_FREE(profile); + enif_map_iterator_destroy(env, &iter); + + return ret; +} + +ERL_NIF_TERM encode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 3) { + return xav_nif_raise(env, "invalid_arg_count"); + } + + struct XavEncoder *xav_encoder; + if (!enif_get_resource(env, argv[0], xav_encoder_resource_type, (void **)&xav_encoder)) { + return xav_nif_raise(env, "invalid_resource"); + } + + ErlNifBinary input; + if (!enif_inspect_binary(env, argv[1], &input)) { + return xav_nif_raise(env, "failed_to_inspect_binary"); + } + + int pts; + if (!enif_get_int(env, argv[2], &pts)) { + return xav_nif_raise(env, "failed_to_get_int"); + } + + AVFrame *frame = xav_encoder->frame; + frame->width = xav_encoder->encoder->c->width; + frame->height = xav_encoder->encoder->c->height; + frame->format = xav_encoder->encoder->c->pix_fmt; + frame->pts = pts; + + int ret = av_image_fill_arrays(frame->data, frame->linesize, input.data, frame->format, + frame->width, frame->height, 1); + if (ret < 0) { + return xav_nif_raise(env, "failed_to_fill_arrays"); + } + + ret = encoder_encode(xav_encoder->encoder, frame); + if (ret < 0) { + return xav_nif_raise(env, "failed_to_encode"); + } + + return packets_to_term(env, xav_encoder->encoder); +} + +ERL_NIF_TERM flush(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 1) { + return xav_nif_raise(env, "invalid_arg_count"); + } + + struct XavEncoder *xav_encoder; + if (!enif_get_resource(env, argv[0], xav_encoder_resource_type, (void **)&xav_encoder)) { + return xav_nif_raise(env, "invalid_resource"); + } + + int ret = encoder_encode(xav_encoder->encoder, NULL); + if (ret < 0) { + return xav_nif_raise(env, "failed_to_encode"); + } + + return packets_to_term(env, xav_encoder->encoder); +} + +void free_xav_encoder(ErlNifEnv *env, void *obj) { + XAV_LOG_DEBUG("Freeing XavEncoder object"); + struct XavEncoder *xav_encoder = (struct XavEncoder *)obj; + + if (xav_encoder->encoder != NULL) { + encoder_free(&xav_encoder->encoder); + } + + if (xav_encoder->frame != NULL) { + av_frame_free(&xav_encoder->frame); + } +} + +static ERL_NIF_TERM packets_to_term(ErlNifEnv *env, struct Encoder *encoder) { + ERL_NIF_TERM ret; + ERL_NIF_TERM *packets = XAV_ALLOC(sizeof(ERL_NIF_TERM) * encoder->num_packets); + for (int i = 0; i < encoder->num_packets; i++) { + packets[i] = xav_nif_packet_to_term(env, encoder->packets[i]); + } + + ret = enif_make_list_from_array(env, packets, encoder->num_packets); + + for (int i = 0; i < encoder->num_packets; i++) + av_packet_unref(encoder->packets[i]); + XAV_FREE(packets); + + return ret; +} + +static int get_profile(enum AVCodecID codec, const char *profile_name) { + if (codec == AV_CODEC_ID_H264) { + if (strcmp(profile_name, "constrained_baseline") == 0) { + return FF_PROFILE_H264_CONSTRAINED_BASELINE; + } else if (strcmp(profile_name, "baseline") == 0) { + return FF_PROFILE_H264_BASELINE; + } else if (strcmp(profile_name, "main") == 0) { + return FF_PROFILE_H264_MAIN; + } else if (strcmp(profile_name, "high") == 0) { + return FF_PROFILE_H264_HIGH; + } + } + + if (codec == AV_CODEC_ID_HEVC) { + if (strcmp(profile_name, "main") == 0) { + return FF_PROFILE_HEVC_MAIN; + } else if (strcmp(profile_name, "main_10") == 0) { + return FF_PROFILE_HEVC_MAIN_10; + } else if (strcmp(profile_name, "main_still_picture") == 0) { + return FF_PROFILE_HEVC_MAIN_STILL_PICTURE; + } + } + + return FF_PROFILE_UNKNOWN; +} + +static ErlNifFunc xav_funcs[] = {{"new", 2, new}, {"encode", 3, encode}, {"flush", 1, flush}}; + +static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) { + xav_encoder_resource_type = + enif_open_resource_type(env, NULL, "XavEncoder", free_xav_encoder, ERL_NIF_RT_CREATE, NULL); + return 0; +} + +ERL_NIF_INIT(Elixir.Xav.Encoder.NIF, xav_funcs, &load, NULL, NULL, NULL); \ No newline at end of file diff --git a/c_src/xav/xav_encoder.h b/c_src/xav/xav_encoder.h new file mode 100644 index 0000000..1f4d1d7 --- /dev/null +++ b/c_src/xav/xav_encoder.h @@ -0,0 +1,8 @@ +#include "encoder.h" +#include "utils.h" +#include + +struct XavEncoder { + struct Encoder *encoder; + AVFrame *frame; +}; diff --git a/c_src/xav/xav_video_converter.c b/c_src/xav/xav_video_converter.c index 8f498a5..9092b2b 100644 --- a/c_src/xav/xav_video_converter.c +++ b/c_src/xav/xav_video_converter.c @@ -29,7 +29,7 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { int width, height; char *format = NULL; - if (!xav_get_atom(env, argv[0], &format)) { + if (!xav_nif_get_atom(env, argv[0], &format)) { return xav_nif_raise(env, "failed_to_get_atom"); } @@ -97,7 +97,7 @@ ERL_NIF_TERM convert(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { return xav_nif_raise(env, "failed_to_get_int"); } - if (!xav_get_atom(env, argv[4], &format)) { + if (!xav_nif_get_atom(env, argv[4], &format)) { return xav_nif_raise(env, "failed_to_get_atom"); } diff --git a/lib/encoder.ex b/lib/encoder.ex new file mode 100644 index 0000000..59a34f3 --- /dev/null +++ b/lib/encoder.ex @@ -0,0 +1,131 @@ +defmodule Xav.Encoder do + @moduledoc """ + Audio/Video encoder. + + Currently, it only supports video encoding: + * `h264` + * `h265`/`hevc` + """ + + @type t :: reference() + + @type codec :: :h264 | :h265 | :hevc + @type encoder_options :: Keyword.t() + + @video_codecs [:h264, :h265, :hevc] + + @video_encoder_schema [ + width: [ + type: :pos_integer, + required: true, + doc: "Width of the video samples." + ], + height: [ + type: :pos_integer, + required: true, + doc: "Height of the video samples." + ], + format: [ + type: :atom, + required: true, + doc: "Pixel format of the video samples." + ], + time_base: [ + type: {:tuple, [:pos_integer, :pos_integer]}, + required: true, + doc: """ + Time base of the video stream. + + It is a rational represented as a tuple of two postive integers `{numerator, denominator}`. + It represent the number of ticks `denominator` in `numerator` seconds. e.g. `{1, 90000}` reprensents + 90000 ticks in 1 second. + + it is used for the decoding and presentation timestamps of the video frames. For video frames with constant + frame rate, choose a timebase of `{1, frame_rate}`. + """ + ], + gop_size: [ + type: :pos_integer, + doc: """ + Group of pictures length. + + Determines the interval in which I-Frames (or keyframes) are inserted in + the stream. e.g. a value of 50, means the I-Frame will be inserted at the 1st frame, + the 51st frame, the 101st frame, and so on. + """ + ], + max_b_frames: [ + type: :non_neg_integer, + doc: """ + Maximum number of consecutive B-Frames to insert between non-B-Frames. + + A value of 0, disable insertion of B-Frames. + """ + ], + profile: [ + type: {:in, [:constrained_baseline, :baseline, :main, :high, :main_10, :main_still_picture]}, + type_doc: "`t:atom/0`", + doc: """ + The encoder's profile. + + A profile defines the capabilities and features an encoder can use to + target specific applications (e.g. `live video`) + + The following profiles are defined: + + | Codec | Profiles | + |-------|----------| + | h264 | constrained_baseline, baseline, main, high | + | h265/hevc | main, main_10, main_still_picture | + """ + ] + ] + + @doc """ + Create a new encoder. + + It accepts the following options:\n#{NimbleOptions.docs(@video_encoder_schema)} + """ + @spec new(codec(), Keyword.t()) :: t() + def new(codec, opts) when codec in @video_codecs do + opts = NimbleOptions.validate!(opts, @video_encoder_schema) + {time_base_num, time_base_den} = opts[:time_base] + + nif_options = + opts + |> Map.new() + |> Map.delete(:time_base) + |> Map.merge(%{time_base_num: time_base_num, time_base_den: time_base_den}) + + Xav.Encoder.NIF.new(codec, nif_options) + end + + @doc """ + Encodes a frame. + + The return value may be an empty list in case the encoder + needs more frames to produce a packet. + """ + @spec encode(t(), Xav.Frame.t()) :: [Xav.Packet.t()] + def encode(encoder, frame) do + encoder + |> Xav.Encoder.NIF.encode(frame.data, frame.pts) + |> to_packets() + end + + @doc """ + Flush the encoder. + """ + @spec flush(t()) :: [Xav.Packet.t()] + def flush(encoder) do + encoder + |> Xav.Encoder.NIF.flush() + |> to_packets() + end + + defp to_packets(result) do + Enum.map(result, fn {data, dts, pts, keyframe?} -> + %Xav.Packet{data: data, dts: dts, pts: pts, keyframe?: keyframe?} + end) + end +end diff --git a/lib/encoder_nif.ex b/lib/encoder_nif.ex new file mode 100644 index 0000000..12dc90a --- /dev/null +++ b/lib/encoder_nif.ex @@ -0,0 +1,16 @@ +defmodule Xav.Encoder.NIF do + @moduledoc false + + @on_load :__on_load__ + + def __on_load__ do + path = :filename.join(:code.priv_dir(:xav), ~c"libxavencoder") + :ok = :erlang.load_nif(path, 0) + end + + def new(_codec, _params), do: :erlang.nif_error(:undef) + + def encode(_encoder, _data, _pts), do: :erlang.nif_error(:undef) + + def flush(_encoder), do: :erlang.nif_error(:undef) +end diff --git a/lib/packet.ex b/lib/packet.ex new file mode 100644 index 0000000..b1daaca --- /dev/null +++ b/lib/packet.ex @@ -0,0 +1,19 @@ +defmodule Xav.Packet do + @moduledoc """ + A module representing an audio/video compressed data. + """ + + @type t :: %__MODULE__{ + data: binary(), + dts: integer(), + pts: integer(), + keyframe?: boolean() + } + + defstruct [:data, :dts, :pts, :keyframe?] + + @spec new(Enumerable.t()) :: t() + def new(opts) do + struct!(%__MODULE__{}, opts) + end +end diff --git a/mix.exs b/mix.exs index 2015d3e..02ffc9a 100644 --- a/mix.exs +++ b/mix.exs @@ -56,6 +56,7 @@ defmodule Xav.MixProject do [ {:nx, "~> 0.7", optional: true}, {:elixir_make, "~> 0.7", runtime: false}, + {:nimble_options, "~> 1.0"}, # dev/test # bumblebee and exla for testing speech to text diff --git a/mix.lock b/mix.lock index 44b7df6..46e2f91 100644 --- a/mix.lock +++ b/mix.lock @@ -18,6 +18,7 @@ "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"}, + "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "nx": {:hex, :nx, "0.9.1", "b5296f178d24ded118d5fd5c3977bb65c7f6ad8113eff4cb1401ac1770eb837a", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "f67ca8fcf09f73000e9a59a19f93ad8e0e581f4993e008527a4a6f280c71c467"}, diff --git a/test/encoder_test.exs b/test/encoder_test.exs new file mode 100644 index 0000000..7c9cdfc --- /dev/null +++ b/test/encoder_test.exs @@ -0,0 +1,115 @@ +defmodule Xav.EncoderTest do + use ExUnit.Case, async: true + + alias NimbleOptions.ValidationError + + describe "new/2" do + test "new encoder" do + assert encoder = + Xav.Encoder.new(:h264, + width: 180, + height: 160, + format: :yuv420p, + time_base: {1, 90_000} + ) + + assert is_reference(encoder) + end + + test "raises on invalid encoder" do + assert_raise FunctionClauseError, fn -> Xav.Encoder.new(:h263, []) end + end + + test "raises on invalid options" do + assert_raise ValidationError, fn -> Xav.Encoder.new(:h264, width: 180) end + + assert_raise ValidationError, fn -> + Xav.Encoder.new(:hevc, width: 360, height: -4, format: :yuv420p, time_base: {1, 90_000}) + end + end + end + + describe "encode/1" do + setup do + frame = %Xav.Frame{ + type: :video, + data: File.read!("test/fixtures/video_converter/frame_360x240.yuv"), + format: :yuv420p, + width: 360, + height: 240, + pts: 0 + } + + %{frame: frame} + end + + test "encode a frame", %{frame: frame} do + encoder = + Xav.Encoder.new(:h264, + width: 360, + height: 240, + format: :yuv420p, + time_base: {1, 25} + ) + + assert [] = Xav.Encoder.encode(encoder, frame) + + assert [ + %Xav.Packet{ + data: data, + dts: 0, + pts: 0, + keyframe?: true + } + ] = Xav.Encoder.flush(encoder) + + assert byte_size(data) > 0 + end + + test "encode multiple frames", %{frame: frame} do + encoder = + Xav.Encoder.new(:h264, + width: 360, + height: 240, + format: :yuv420p, + time_base: {1, 25}, + gop_size: 1 + ) + + packets = + Xav.Encoder.encode(encoder, frame) ++ + Xav.Encoder.encode(encoder, %{frame | pts: 1}) ++ + Xav.Encoder.encode(encoder, %{frame | pts: 2}) ++ Xav.Encoder.flush(encoder) + + assert length(packets) == 3 + assert Enum.all?(packets, & &1.keyframe?) + end + + test "no bframes inserted", %{frame: frame} do + encoder = + Xav.Encoder.new(:hevc, + width: 360, + height: 240, + format: :yuv420p, + time_base: {1, 25}, + max_b_frames: 0 + ) + + packets = + Stream.iterate(frame, fn frame -> %{frame | pts: frame.pts + 1} end) + |> Stream.take(20) + |> Stream.transform( + fn -> encoder end, + fn frame, encoder -> + {Xav.Encoder.encode(encoder, frame), encoder} + end, + fn encoder -> {Xav.Encoder.flush(encoder), encoder} end, + fn _encoder -> :ok end + ) + |> Enum.to_list() + + assert length(packets) == 20 + Enum.all?(packets, &(&1.dts == &1.pts)) + end + end +end