Skip to content

Commit

Permalink
Refactor out format resolving.
Browse files Browse the repository at this point in the history
This commit moves out format resolving to the xav_decoder.c.
This way we can remove out_format from decoder struct.
  • Loading branch information
mickel8 committed Jan 3, 2025
1 parent 13fc3b0 commit 49848fd
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 87 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ end
Decode

```elixir
decoder = Xav.Decoder.new(:vp8)
decoder = Xav.Decoder.new(:vp8, out_format: :rgb24)
{:ok, %Xav.Frame{} = frame} = Xav.Decoder.decode(decoder, <<"somebinary">>)
```

Expand All @@ -52,7 +52,7 @@ Kino.Image.new(tensor)
Read from a camera:

```elixir
r = Xav.Reader.new!("/dev/video0", device?: true)
r = Xav.Reader.new!("/dev/video0", device?: true, out_format: :rgb24)
{:ok, %Xav.Frame{} = frame} = Xav.Reader.next_frame(r)
tensor = Xav.Frame.to_nx(frame)
Kino.Image.new(tensor)
Expand Down
29 changes: 4 additions & 25 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,18 @@ struct Decoder *decoder_alloc() {

decoder->codec = NULL;
decoder->c = NULL;
decoder->out_format = AV_PIX_FMT_NONE;

return decoder;
}

int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format) {
if (strcmp(codec, "opus") == 0) {
decoder->media_type = AVMEDIA_TYPE_AUDIO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_OPUS);
} else if (strcmp(codec, "vp8") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_VP8);
} else if (strcmp(codec, "h264") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H264);
} else if (strcmp(codec, "h265") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
} else {
return -1;
}
int decoder_init(struct Decoder *decoder, enum AVMediaType media_type, enum AVCodecID codec_id) {
decoder->media_type = media_type;
decoder->codec = avcodec_find_decoder(codec_id);

if (!decoder->codec) {
return -1;
}

if(decoder->media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
decoder->out_format = av_get_pix_fmt(out_format);
if (decoder->out_format == AV_PIX_FMT_NONE) {
return -1;
}
}

decoder->c = avcodec_alloc_context3(decoder->codec);
if (!decoder->c) {
return -1;
Expand Down Expand Up @@ -74,7 +53,7 @@ int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) {
return avcodec_receive_frame(decoder->c, frame);
}

int decoder_flush(struct Decoder *decoder, AVFrame **frames, int *frames_count) {
int decoder_flush(struct Decoder *decoder, AVFrame **frames, int *frames_count) {
int ret = avcodec_send_packet(decoder->c, NULL);
if (ret != 0) {
return ret;
Expand Down
3 changes: 1 addition & 2 deletions c_src/xav/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

struct Decoder {
enum AVMediaType media_type;
enum AVPixelFormat out_format;
AVFrame *frame;
AVPacket *pkt;
const AVCodec *codec;
Expand All @@ -17,7 +16,7 @@ struct Decoder {

struct Decoder *decoder_alloc();

int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format);
int decoder_init(struct Decoder *decoder, enum AVMediaType media_type, enum AVCodecID codec_id);

int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame);

Expand Down
13 changes: 7 additions & 6 deletions c_src/xav/utils.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "utils.h"
#include <libavutil/mathematics.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/opt.h>
#include <stdint.h>

Expand All @@ -21,14 +21,14 @@ ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg) {
}

ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts) {
int out_size, enum AVSampleFormat out_format, int pts) {
ERL_NIF_TERM data_term;

unsigned char *ptr = enif_make_new_binary(env, out_size, &data_term);
memcpy(ptr, out_data[0], out_size);

ERL_NIF_TERM samples_term = enif_make_int(env, out_samples);
ERL_NIF_TERM format_term = enif_make_atom(env, out_format);
ERL_NIF_TERM format_term = enif_make_atom(env, av_get_sample_fmt_name(out_format));
ERL_NIF_TERM pts_term = enif_make_int(env, pts);

return enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term);
Expand All @@ -39,9 +39,10 @@ ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame) {

int payload_size = av_image_get_buffer_size(frame->format, frame->width, frame->height, 1);
unsigned char *ptr = enif_make_new_binary(env, payload_size, &data_term);

av_image_copy_to_buffer(ptr, payload_size, (const uint8_t *const *)frame->data,
(const int*)frame->linesize, frame->format, frame->width, frame->height, 1);

av_image_copy_to_buffer(ptr, payload_size, (const uint8_t *const *)frame->data,
(const int *)frame->linesize, frame->format, frame->width, frame->height,
1);

ERL_NIF_TERM format_term = enif_make_atom(env, av_get_pix_fmt_name(frame->format));
ERL_NIF_TERM height_term = enif_make_int(env, frame->height);
Expand Down
2 changes: 1 addition & 1 deletion c_src/xav/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason);
ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame);
ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts);
int out_size, enum AVSampleFormat out_format, int pts);
4 changes: 2 additions & 2 deletions c_src/xav/video_converter.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "video_converter.h"

int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixelFormat out_format) {
int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame,
enum AVPixelFormat out_format) {
int ret;

*dst_frame = av_frame_alloc();
Expand All @@ -26,7 +27,6 @@ int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixe
return ret;
}


// is this (const uint8_t * const*) cast really correct?
ret = sws_scale(sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 0,
src_frame->height, (*dst_frame)->data, (*dst_frame)->linesize);
Expand Down
90 changes: 53 additions & 37 deletions c_src/xav/xav_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,61 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_raise(env, "invalid_arg_count");
}

// resolve codec
unsigned int codec_len;
if (!enif_get_atom_length(env, argv[0], &codec_len, ERL_NIF_LATIN1)) {
return xav_nif_raise(env, "failed_to_get_atom_length");
}

char *codec = (char *)XAV_ALLOC((codec_len + 1) * sizeof(char *));

if (enif_get_atom(env, argv[0], codec, codec_len + 1, ERL_NIF_LATIN1) == 0) {
return xav_nif_raise(env, "failed_to_get_atom");
}

enum AVMediaType media_type;
enum AVCodecID codec_id;
if (strcmp(codec, "opus") == 0) {
media_type = AVMEDIA_TYPE_AUDIO;
codec_id = AV_CODEC_ID_OPUS;
} else if (strcmp(codec, "vp8") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_VP8;
} else if (strcmp(codec, "h264") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_H264;
} else if (strcmp(codec, "h265") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_HEVC;
} else {
return xav_nif_raise(env, "failed_to_resolve_codec");
}

// resolve output format
unsigned int out_format_len;
if (!enif_get_atom_length(env, argv[1], &out_format_len, ERL_NIF_LATIN1)) {
return xav_nif_raise(env, "failed_to_get_atom_length");
}

char *out_format = (char *)XAV_ALLOC((out_format_len + 1) * sizeof(char *));

if (enif_get_atom(env, argv[1], out_format, out_format_len + 1, ERL_NIF_LATIN1) == 0) {
return xav_nif_raise(env, "failed_to_get_atom");
}

enum AVPixelFormat out_video_fmt = AV_PIX_FMT_NONE;
enum AVSampleFormat out_audo_fmt = AV_SAMPLE_FMT_NONE;
if (media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
out_video_fmt = av_get_pix_fmt(out_format);
if (out_video_fmt == AV_PIX_FMT_NONE) {
return xav_nif_raise(env, "unknown_out_format");
}
} else if (media_type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) {
out_audo_fmt = av_get_sample_fmt(out_format);
if (out_audo_fmt == AV_SAMPLE_FMT_NONE) {
return xav_nif_raise(env, "unknown_out_format");
}
}

// resolve other params
int out_sample_rate;
if (!enif_get_int(env, argv[2], &out_sample_rate)) {
return xav_nif_raise(env, "invalid_out_sample_rate");
Expand All @@ -53,7 +86,8 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
enif_alloc_resource(xav_decoder_resource_type, sizeof(struct XavDecoder));
xav_decoder->decoder = NULL;
xav_decoder->ac = NULL;
xav_decoder->out_format = out_format;
xav_decoder->out_audio_fmt = out_audo_fmt;
xav_decoder->out_video_fmt = out_video_fmt;
xav_decoder->out_sample_rate = out_sample_rate;
xav_decoder->out_channels = out_channels;

Expand All @@ -62,31 +96,31 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_raise(env, "failed_to_allocate_decoder");
}

if (decoder_init(xav_decoder->decoder, codec, xav_decoder->out_format) != 0) {
if (decoder_init(xav_decoder->decoder, media_type, codec_id) != 0) {
return xav_nif_raise(env, "failed_to_init_decoder");
}

ERL_NIF_TERM decoder_term = enif_make_resource(env, xav_decoder);
enif_release_resource(xav_decoder);

XAV_FREE(out_format);

return decoder_term;
}

ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* frame) {
ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *frame) {
ERL_NIF_TERM frame_term;
int ret;

if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) {
XAV_LOG_DEBUG("Converting video to RGB");

int out_pix_fmt = xav_decoder->decoder->out_format;

if (out_pix_fmt == AV_PIX_FMT_NONE) {
if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE) {
return xav_nif_video_frame_to_term(env, frame);
}

AVFrame *dst_frame;
ret = video_converter_convert(frame, &dst_frame, out_pix_fmt);
ret = video_converter_convert(frame, &dst_frame, xav_decoder->out_video_fmt);
if (ret <= 0) {
return xav_nif_raise(env, "failed_to_decode");
}
Expand All @@ -104,7 +138,7 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* fr
if (xav_decoder->ac == NULL) {
ret = init_audio_converter(xav_decoder);
if (ret < 0) {
return xav_nif_raise(env, "failed_to_init_converter");;
return xav_nif_raise(env, "failed_to_init_converter");
}
}

Expand All @@ -113,15 +147,8 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* fr
return xav_nif_raise(env, "failed_to_decode");
}

const char *out_format = av_get_sample_fmt_name(xav_decoder->ac->out_sample_fmt);

if (strcmp(out_format, "flt") == 0) {
out_format = "f32";
} else if (strcmp(out_format, "dbl") == 0) {
out_format = "f64";
}

frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size, out_format, frame->pts);
frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size,
xav_decoder->out_audio_fmt, frame->pts);

av_freep(&out_data[0]);
}
Expand Down Expand Up @@ -229,23 +256,12 @@ static int init_audio_converter(struct XavDecoder *xav_decoder) {
out_sample_rate = xav_decoder->out_sample_rate;
}

enum AVSampleFormat out_sample_fmt;
if (strcmp(xav_decoder->out_format, "u8") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_U8;
} else if (strcmp(xav_decoder->out_format, "s16") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S16;
} else if (strcmp(xav_decoder->out_format, "s32") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S32;
} else if (strcmp(xav_decoder->out_format, "s64") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S64;
} else if (strcmp(xav_decoder->out_format, "f32") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_FLT;
} else if (strcmp(xav_decoder->out_format, "f64") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_DBL;
} else if (strcmp(xav_decoder->out_format, "nil") == 0) {
out_sample_fmt = av_get_alt_sample_fmt(xav_decoder->decoder->c->sample_fmt, 0);
} else {
return -1;
// If user didn't request any specific format,
// just take the original format but in the packed form.
// We need to call this function here, as in the decoder_init we don't know
// what is the sample_fmt yet.
if (xav_decoder->out_audio_fmt == AV_SAMPLE_FMT_NONE) {
xav_decoder->out_audio_fmt = av_get_alt_sample_fmt(xav_decoder->decoder->c->sample_fmt, 0);
}

struct ChannelLayout in_chlayout, out_chlayout;
Expand All @@ -267,7 +283,7 @@ static int init_audio_converter(struct XavDecoder *xav_decoder) {

return audio_converter_init(xav_decoder->ac, in_chlayout, xav_decoder->decoder->c->sample_rate,
xav_decoder->decoder->c->sample_fmt, out_chlayout, out_sample_rate,
out_sample_fmt);
xav_decoder->out_audio_fmt);
}

void free_xav_decoder(ErlNifEnv *env, void *obj) {
Expand Down
5 changes: 4 additions & 1 deletion c_src/xav/xav_decoder.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#include "audio_converter.h"
#include "decoder.h"

#include <libavutil/pixfmt.h>

struct XavDecoder {
struct Decoder *decoder;
struct AudioConverter *ac;
char *out_format;
enum AVPixelFormat out_video_fmt;
enum AVSampleFormat out_audio_fmt;
int out_sample_rate;
int out_channels;
};
13 changes: 3 additions & 10 deletions c_src/xav/xav_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,16 +169,9 @@ ERL_NIF_TERM next_frame(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_raise(env, "failed_to_read");
}

const char *out_format = av_get_sample_fmt_name(xav_reader->ac->out_sample_fmt);

if (strcmp(out_format, "flt") == 0) {
out_format = "f32";
} else if (strcmp(out_format, "dbl") == 0) {
out_format = "f64";
}

frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size, out_format,
xav_reader->reader->frame->pts);
frame_term =
xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size,
xav_reader->ac->out_sample_fmt, xav_reader->reader->frame->pts);
av_freep(&out_data[0]);
}

Expand Down
7 changes: 7 additions & 0 deletions lib/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,15 @@ defmodule Xav.Decoder do
:ok

{:ok, {data, format, width, height, pts}} ->
format = normalize_format(format)
{:ok, Xav.Frame.new(data, format, width, height, pts)}

# Sometimes, audio converter might not return data immediately.
{:ok, {"", _format, _samples, _pts}} ->
:ok

{:ok, {data, format, samples, pts}} ->
format = normalize_format(format)
{:ok, Xav.Frame.new(data, format, samples, pts)}

{:error, _reason} = error ->
Expand Down Expand Up @@ -123,4 +125,9 @@ defmodule Xav.Decoder do
{:error, reason} -> raise "Failed to flush decoder: #{inspect(reason)}"
end
end

# Use the same formats as Nx
defp normalize_format(:flt), do: :f32
defp normalize_format(:dbl), do: :f64
defp normalize_format(other), do: other
end
Loading

0 comments on commit 49848fd

Please sign in to comment.