Skip to content

Commit

Permalink
Allow to specify output pixel format (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
gBillal authored Jan 3, 2025
1 parent e515ec8 commit 13fc3b0
Show file tree
Hide file tree
Showing 11 changed files with 90 additions and 39 deletions.
12 changes: 10 additions & 2 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ struct Decoder *decoder_alloc() {

decoder->codec = NULL;
decoder->c = NULL;
decoder->out_format = AV_PIX_FMT_NONE;

return decoder;
}

int decoder_init(struct Decoder *decoder, const char *codec) {
int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format) {
if (strcmp(codec, "opus") == 0) {
decoder->media_type = AVMEDIA_TYPE_AUDIO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_OPUS);
Expand All @@ -25,7 +26,7 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H264);
} else if (strcmp(codec, "h265") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H265);
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
} else {
return -1;
}
Expand All @@ -34,6 +35,13 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
return -1;
}

if(decoder->media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
decoder->out_format = av_get_pix_fmt(out_format);
if (decoder->out_format == AV_PIX_FMT_NONE) {
return -1;
}
}

decoder->c = avcodec_alloc_context3(decoder->codec);
if (!decoder->c) {
return -1;
Expand Down
3 changes: 2 additions & 1 deletion c_src/xav/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

struct Decoder {
enum AVMediaType media_type;
enum AVPixelFormat out_format;
AVFrame *frame;
AVPacket *pkt;
const AVCodec *codec;
Expand All @@ -16,7 +17,7 @@ struct Decoder {

struct Decoder *decoder_alloc();

int decoder_init(struct Decoder *decoder, const char *codec);
int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format);

int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame);

Expand Down
14 changes: 9 additions & 5 deletions c_src/xav/utils.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "utils.h"
#include <libavutil/mathematics.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <stdint.h>

Expand Down Expand Up @@ -33,13 +34,16 @@ ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int
return enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term);
}

ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, uint8_t *data[4],
int linesize[4], const char *format_name) {
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame) {
ERL_NIF_TERM data_term;
unsigned char *ptr = enif_make_new_binary(env, linesize[0] * frame->height, &data_term);
memcpy(ptr, data[0], linesize[0] * frame->height);

ERL_NIF_TERM format_term = enif_make_atom(env, format_name);
int payload_size = av_image_get_buffer_size(frame->format, frame->width, frame->height, 1);
unsigned char *ptr = enif_make_new_binary(env, payload_size, &data_term);

av_image_copy_to_buffer(ptr, payload_size, (const uint8_t *const *)frame->data,
(const int*)frame->linesize, frame->format, frame->width, frame->height, 1);

ERL_NIF_TERM format_term = enif_make_atom(env, av_get_pix_fmt_name(frame->format));
ERL_NIF_TERM height_term = enif_make_int(env, frame->height);
ERL_NIF_TERM width_term = enif_make_int(env, frame->width);
ERL_NIF_TERM pts_term = enif_make_int64(env, frame->pts);
Expand Down
3 changes: 1 addition & 2 deletions c_src/xav/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term);
ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason);
ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, uint8_t *out_data[4],
int out_linesize[4], const char *out_format);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame);
ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts);
29 changes: 21 additions & 8 deletions c_src/xav/video_converter.c
Original file line number Diff line number Diff line change
@@ -1,30 +1,43 @@
#include "video_converter.h"

int video_converter_convert(AVFrame *src_frame, uint8_t *out_data[], int out_linesize[]) {
int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixelFormat out_format) {
int ret;

*dst_frame = av_frame_alloc();
if (!*dst_frame) {
return -1;
}

(*dst_frame)->width = src_frame->width;
(*dst_frame)->height = src_frame->height;
(*dst_frame)->format = out_format;
(*dst_frame)->pts = src_frame->pts;

ret = av_frame_get_buffer(*dst_frame, 0);
if (ret < 0) {
return ret;
}

struct SwsContext *sws_ctx =
sws_getContext(src_frame->width, src_frame->height, src_frame->format, src_frame->width,
src_frame->height, AV_PIX_FMT_RGB24, SWS_BILINEAR, NULL, NULL, NULL);

ret = av_image_alloc(out_data, out_linesize, src_frame->width, src_frame->height,
AV_PIX_FMT_RGB24, 1);
src_frame->height, out_format, SWS_BILINEAR, NULL, NULL, NULL);

if (ret < 0) {
return ret;
}


// is this (const uint8_t * const*) cast really correct?
ret = sws_scale(sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 0,
src_frame->height, out_data, out_linesize);
src_frame->height, (*dst_frame)->data, (*dst_frame)->linesize);

if (ret < 0) {
av_freep(&out_data[0]);
av_frame_free(dst_frame);
sws_freeContext(sws_ctx);
return ret;
}

sws_freeContext(sws_ctx);

return ret;
}
}
2 changes: 1 addition & 1 deletion c_src/xav/video_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
#include <libswscale/swscale.h>
#include <stdint.h>

int video_converter_convert(AVFrame *src_frame, uint8_t *out_data[4], int out_linesize[4]);
int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixelFormat out_format);
16 changes: 10 additions & 6 deletions c_src/xav/xav_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_raise(env, "failed_to_allocate_decoder");
}

if (decoder_init(xav_decoder->decoder, codec) != 0) {
if (decoder_init(xav_decoder->decoder, codec, xav_decoder->out_format) != 0) {
return xav_nif_raise(env, "failed_to_init_decoder");
}

Expand All @@ -79,17 +79,21 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* fr
if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) {
XAV_LOG_DEBUG("Converting video to RGB");

uint8_t *out_data[4];
int out_linesize[4];
int out_pix_fmt = xav_decoder->decoder->out_format;

ret = video_converter_convert(frame, out_data, out_linesize);
if (out_pix_fmt == AV_PIX_FMT_NONE) {
return xav_nif_video_frame_to_term(env, frame);
}

AVFrame *dst_frame;
ret = video_converter_convert(frame, &dst_frame, out_pix_fmt);
if (ret <= 0) {
return xav_nif_raise(env, "failed_to_decode");
}

frame_term = xav_nif_video_frame_to_term(env, frame, out_data, out_linesize, "rgb");
frame_term = xav_nif_video_frame_to_term(env, dst_frame);

av_freep(&out_data[0]);
av_frame_free(&dst_frame);
} else if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_AUDIO) {
XAV_LOG_DEBUG("Converting audio to desired out format");

Expand Down
11 changes: 4 additions & 7 deletions c_src/xav/xav_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,15 @@ ERL_NIF_TERM next_frame(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
if (xav_reader->reader->media_type == AVMEDIA_TYPE_VIDEO) {
XAV_LOG_DEBUG("Converting video to RGB");

uint8_t *out_data[4];
int out_linesize[4];

ret = video_converter_convert(xav_reader->reader->frame, out_data, out_linesize);
AVFrame *dst_frame;
ret = video_converter_convert(xav_reader->reader->frame, &dst_frame, AV_PIX_FMT_RGB24);
if (ret <= 0) {
return xav_nif_raise(env, "failed_to_read");
}

frame_term =
xav_nif_video_frame_to_term(env, xav_reader->reader->frame, out_data, out_linesize, "rgb");
frame_term = xav_nif_video_frame_to_term(env, dst_frame);

av_freep(&out_data[0]);
av_frame_free(&dst_frame);
} else if (xav_reader->reader->media_type == AVMEDIA_TYPE_AUDIO) {
XAV_LOG_DEBUG("Converting audio to desired out format");

Expand Down
7 changes: 5 additions & 2 deletions lib/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@ defmodule Xav.Decoder do
[out_format: :f32]
```
Video frames are always returned in RGB format.
This setting cannot be changed.
or video samples format:
```elixir
[out_format: :rgb24]
```
Audio samples are always in the packed form -
samples from different channels are interleaved in the same, single binary:
Expand Down
10 changes: 8 additions & 2 deletions lib/frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@ defmodule Xav.Frame do
@typedoc """
Possible video frame formats.
Currently, only RGB is supported.
The list of accepted formats are all `ffmpeg` pixel formats. For a complete list run:
```sh
ffmpeg -pix_fmts
```
An example of a pixel format is `:rgb24`.
"""
@type video_format() :: :rgb
@type video_format() :: atom()

@type format() :: audio_format() | video_format()

Expand Down
22 changes: 19 additions & 3 deletions test/decoder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,11 @@ defmodule Xav.DecoderTest do
142, 204, 5, 106, 217, 175, 162, 62, 128, 161, 69, 136, 234, 30, 43, 165, 152,
104, 143>>

# Use ffmpeg to extract the first frame of the video
# ffmpeg -i sample_video.mp4 -c:v copy -f h264 -vframes 1 sample_h264.h264
@h264_frame File.read!("test/fixtures/decoder/sample_h264.h264")
# You can do the same for hevc given that the mp4 file contains a hevc stream
# ffmpeg -i sample_video.mp4 -c:v copy -f hevc -vframes 1 sample_h265.h265
@h265_frame File.read!("test/fixtures/decoder/sample_h265.h265")

test "new/0" do
Expand Down Expand Up @@ -323,8 +327,10 @@ defmodule Xav.DecoderTest do
test "video keyframe" do
decoder = Xav.Decoder.new(:vp8)

assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, format: :rgb}} =
assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, data: frame, format: :yuv420p}} =
Xav.Decoder.decode(decoder, @vp8_keyframe)

assert byte_size(frame) == 640 * 480 * 3 / 2
end

test "video without prior keyframe" do
Expand All @@ -338,7 +344,7 @@ defmodule Xav.DecoderTest do

assert :ok = Xav.Decoder.decode(decoder, @h264_frame)

assert {:ok, [%Xav.Frame{width: 1280, height: 720, pts: 0, format: :rgb}]} =
assert {:ok, [%Xav.Frame{width: 1280, height: 720, pts: 0, format: :yuv420p}]} =
Xav.Decoder.flush(decoder)
end

Expand All @@ -347,8 +353,18 @@ defmodule Xav.DecoderTest do

assert :ok = Xav.Decoder.decode(decoder, @h265_frame)

assert {:ok, [%Xav.Frame{width: 1920, height: 1080, pts: 0, format: :rgb}]} =
assert {:ok, [%Xav.Frame{width: 1920, height: 1080, pts: 0, format: :yuv420p}]} =
Xav.Decoder.flush(decoder)
end

test "convert video frame" do
decoder = Xav.Decoder.new(:vp8, out_format: :rgb24)

assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, data: frame, format: :rgb24}} =
Xav.Decoder.decode(decoder, @vp8_keyframe)

assert byte_size(frame) == 640 * 480 * 3
end
end
end
end

0 comments on commit 13fc3b0

Please sign in to comment.