Skip to content

Commit

Permalink
Move audio/video converter out of reader/decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
mickel8 committed Aug 5, 2024
1 parent 91c3824 commit 5fec3c1
Show file tree
Hide file tree
Showing 17 changed files with 208 additions and 246 deletions.
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ XAV_READER_SO = $(PRIV_DIR)/libxavreader.so
# uncomment to compile with debug logs
XAV_DEBUG_LOGS = -DXAV_DEBUG=1

DECODER_HEADERS = $(XAV_DIR)/xav_decoder.h $(XAV_DIR)/decoder.h $(XAV_DIR)/converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
DECODER_SOURCES = $(XAV_DIR)/xav_decoder.c $(XAV_DIR)/decoder.c $(XAV_DIR)/converter.c $(XAV_DIR)/utils.c
DECODER_HEADERS = $(XAV_DIR)/xav_decoder.h $(XAV_DIR)/decoder.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
DECODER_SOURCES = $(XAV_DIR)/xav_decoder.c $(XAV_DIR)/decoder.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c

READER_HEADERS = $(XAV_DIR)/xav_reader.h $(XAV_DIR)/reader.h $(XAV_DIR)/converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
READER_SOURCES = $(XAV_DIR)/xav_reader.c $(XAV_DIR)/reader.c $(XAV_DIR)/converter.c $(XAV_DIR)/utils.c
READER_HEADERS = $(XAV_DIR)/xav_reader.h $(XAV_DIR)/reader.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
READER_SOURCES = $(XAV_DIR)/xav_reader.c $(XAV_DIR)/reader.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c

CFLAGS = $(XAV_DEBUG_LOGS) -fPIC -shared
IFLAGS = -I$(ERTS_INCLUDE_DIR) -I$(XAV_DIR)
Expand Down
7 changes: 4 additions & 3 deletions c_src/xav/converter.c → c_src/xav/audio_converter.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <libswresample/swresample.h>
#include <stdint.h>

#include "audio_converter.h"
#include "channel_layout.h"
#include "converter.h"
#include "utils.h"

struct Converter *converter_alloc() {
Expand Down Expand Up @@ -65,20 +65,21 @@ int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_da
return ret;
}

*out_data = out_data_tmp;

*out_samples = swr_convert(c->swr_ctx, out_data_tmp, max_out_nb_samples,
(const uint8_t **)src_frame->data, src_frame->nb_samples);

if (*out_samples < 0) {
XAV_LOG_DEBUG("Couldn't convert samples: %d", *out_samples);
av_freep(&out_data_tmp[0]);
return -1;
}

XAV_LOG_DEBUG("Converted %d samples per channel", *out_samples);

*out_size = *out_samples * out_bytes_per_sample * out_nb_channels;

*out_data = out_data_tmp;

return 0;
}

Expand Down
File renamed without changes.
86 changes: 2 additions & 84 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "decoder.h"
#include "utils.h"
#include "video_converter.h"

static int init_converter(struct Decoder *decoder);

Expand All @@ -8,16 +9,6 @@ struct Decoder *decoder_alloc() {

decoder->codec = NULL;
decoder->c = NULL;
decoder->out_format_name = NULL;

for (int i = 0; i < 4; i++) {
decoder->rgb_dst_data[i] = NULL;
}

decoder->frame_data = NULL;
decoder->frame_linesize = NULL;
decoder->converter = NULL;
decoder->out_data = NULL;

return decoder;
}
Expand All @@ -26,12 +17,9 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
if (strcmp(codec, "opus") == 0) {
decoder->media_type = AVMEDIA_TYPE_AUDIO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_OPUS);
// we will initialize out_format_name with the first frame
decoder->out_format_name = NULL;
} else if (strcmp(codec, "vp8") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_VP8);
decoder->out_format_name = "rgb";
} else {
return -1;
}
Expand Down Expand Up @@ -69,57 +57,13 @@ int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) {
return -2;
}

ret = avcodec_receive_frame(decoder->c, frame);
if (ret != 0) {
return -1;
}

if (decoder->media_type == AVMEDIA_TYPE_AUDIO && decoder->out_format_name == NULL) {
enum AVSampleFormat out_sample_fmt = av_get_alt_sample_fmt(frame->format, 0);
decoder->out_format_name = av_get_sample_fmt_name(out_sample_fmt);
}

if (decoder->media_type == AVMEDIA_TYPE_VIDEO) {
if (frame->format != AV_PIX_FMT_RGB24) {
convert_to_rgb(frame, decoder->rgb_dst_data, decoder->rgb_dst_linesize);
decoder->frame_data = decoder->rgb_dst_data;
decoder->frame_linesize = decoder->rgb_dst_linesize;
} else {
decoder->frame_data = frame->data;
decoder->frame_linesize = frame->linesize;
}
} else if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {

if (decoder->converter == NULL) {
ret = init_converter(decoder);
if (ret < 0) {
return ret;
}
}

return converter_convert(decoder->converter, frame, &decoder->out_data, &decoder->out_samples,
&decoder->out_size);
}

return 0;
return avcodec_receive_frame(decoder->c, frame);
}

void decoder_free_frame(struct Decoder *decoder) {
// TODO revisit this
av_frame_unref(decoder->frame);
av_packet_unref(decoder->pkt);

if (decoder->media_type == AVMEDIA_TYPE_AUDIO && decoder->frame_data == decoder->rgb_dst_data) {
av_freep(&decoder->frame_data[0]);
} else if (decoder->media_type == AVMEDIA_TYPE_VIDEO &&
decoder->frame_data == decoder->rgb_dst_data) {
av_freep(&decoder->frame_data[0]);
}

if (decoder->out_data != NULL) {
// av_freep sets pointer to NULL
av_freep(&decoder->out_data);
}
}

void decoder_free(struct Decoder **decoder) {
Expand All @@ -143,29 +87,3 @@ void decoder_free(struct Decoder **decoder) {
*decoder = NULL;
}
}

static int init_converter(struct Decoder *decoder) {
decoder->converter = converter_alloc();

if (decoder->converter == NULL) {
XAV_LOG_DEBUG("Couldn't allocate converter");
return -1;
}

int out_sample_rate = decoder->c->sample_rate;
enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;

struct ChannelLayout in_chlayout, out_chlayout;
#if LIBAVUTIL_VERSION_MAJOR >= 58
in_chlayout.layout = decoder->c->ch_layout;
out_chlayout.layout = decoder->c->ch_layout;
#else
in_chlayout.layout = decoder->c->channel_layout;
out_chlayout.layout = decoder->c->channel_layout;
XAV_LOG_DEBUG("in_chlayout %ld", in_chlayout.layout);
XAV_LOG_DEBUG("in nb_channels %d", av_get_channel_layout_nb_channels(in_chlayout.layout));
#endif

return converter_init(decoder->converter, in_chlayout, decoder->c->sample_rate,
decoder->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);
}
20 changes: 1 addition & 19 deletions c_src/xav/decoder.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <libavcodec/avcodec.h>
#include <libswresample/swresample.h>

#include "converter.h"
#include "audio_converter.h"
#include "utils.h"

struct Decoder {
Expand All @@ -10,24 +10,6 @@ struct Decoder {
AVPacket *pkt;
const AVCodec *codec;
AVCodecContext *c;

const char *out_format_name;

uint8_t *rgb_dst_data[4];
int rgb_dst_linesize[4];

uint8_t **frame_data;
int *frame_linesize;

struct Converter *converter;
// Buffer where audio samples are written after conversion.
// We always convet to packed format, so only out_data[0] is set.
uint8_t **out_data;
// Number of samples in out_data buffer
int out_samples;
// Size of out_data buffer.
// This is the same as out_samples * bytes_per_sample(out_format) * out_channels.
int out_size;
};

struct Decoder *decoder_alloc();
Expand Down
87 changes: 2 additions & 85 deletions c_src/xav/reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@ struct Reader *reader_alloc() {
reader->options = NULL;
reader->in_format_name = NULL;
reader->out_format_name = NULL;
reader->frame_data = NULL;
reader->frame_linesize = NULL;
for (int i = 0; i < 4; i++) {
reader->rgb_dst_data[i] = NULL;
}
reader->converter = NULL;
reader->out_data = NULL;

return reader;
}
Expand Down Expand Up @@ -105,7 +98,7 @@ int reader_next_frame(struct Reader *reader) {

if (ret == 0) {
XAV_LOG_DEBUG("Received frame");
goto fin;
return 0;
} else if (ret == AVERROR_EOF) {
XAV_LOG_DEBUG("EOF");
return ret;
Expand Down Expand Up @@ -188,47 +181,10 @@ int reader_next_frame(struct Reader *reader) {
}
}

fin:
if (reader->media_type == AVMEDIA_TYPE_VIDEO && reader->frame->format != AV_PIX_FMT_RGB24) {
XAV_LOG_DEBUG("Converting video to RGB");
convert_to_rgb(reader->frame, reader->rgb_dst_data, reader->rgb_dst_linesize);
reader->frame_data = reader->rgb_dst_data;
reader->frame_linesize = reader->rgb_dst_linesize;
} else if (reader->media_type == AVMEDIA_TYPE_VIDEO) {
reader->frame_data = reader->frame->data;
reader->frame_linesize = reader->frame->linesize;
} else if (reader->media_type == AVMEDIA_TYPE_AUDIO) {
XAV_LOG_DEBUG("Converting audio to desired out format");

if (reader->converter == NULL) {
XAV_LOG_DEBUG("Converter not initialized. Initializing.");
ret = init_converter(reader);
if (ret < 0) {
return ret;
}
}

return converter_convert(reader->converter, reader->frame, &reader->out_data,
&reader->out_samples, &reader->out_size);
}

return 0;
}

void reader_free_frame(struct Reader *reader) {
av_frame_unref(reader->frame);

if (reader->media_type == AVMEDIA_TYPE_AUDIO && reader->frame_data == reader->rgb_dst_data) {
av_freep(&reader->frame_data[0]);
} else if (reader->media_type == AVMEDIA_TYPE_VIDEO &&
reader->frame_data == reader->rgb_dst_data) {
av_freep(&reader->frame_data[0]);
}

if (reader->out_data != NULL) {
av_freep(&reader->out_data);
}
}
void reader_free_frame(struct Reader *reader) { av_frame_unref(reader->frame); }

void reader_free(struct Reader **reader) {
XAV_LOG_DEBUG("Freeing Reader object");
Expand Down Expand Up @@ -259,42 +215,3 @@ void reader_free(struct Reader **reader) {
*reader = NULL;
}
}

static int init_converter(struct Reader *reader) {
reader->converter = converter_alloc();

if (reader->converter == NULL) {
XAV_LOG_DEBUG("Couldn't allocate converter");
return -1;
}

int out_sample_rate = 16000;
enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;

struct ChannelLayout in_chlayout, out_chlayout;
#if LIBAVUTIL_VERSION_MAJOR >= 58
in_chlayout.layout = reader->c->ch_layout;
av_channel_layout_from_mask(&out_chlayout.layout, AV_CH_LAYOUT_MONO);
#else
in_chlayout.layout = reader->frame->channel_layout;
out_chlayout.layout = AV_CH_LAYOUT_MONO;

if (reader->frame->channel_layout == 0 && reader->frame->channels > 0) {
// In newer FFmpeg versions, 0 means that the order of channels is
// unspecified but there still might be information about channels number.
// Let's check againts it and take default channel order for the given channels number.
// This is also what newer FFmpeg versions do under the hood when passing
// unspecified channel order.
XAV_LOG_DEBUG("Channel layout unset. Setting to default for channels number: %d",
reader->frame->channels);
in_chlayout.layout = av_get_default_channel_layout(reader->frame->channels);
} else if (reader->frame->channel_layout == 0) {
XAV_LOG_DEBUG("Both channel layout and channels are unset. Cannot init converter.");
return -1;
}

#endif

return converter_init(reader->converter, in_chlayout, reader->c->sample_rate,
reader->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);
}
24 changes: 1 addition & 23 deletions c_src/xav/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <stdlib.h>
#include <string.h>

#include "converter.h"
#include "audio_converter.h"
#include "utils.h"

struct Reader {
Expand All @@ -26,28 +26,6 @@ struct Reader {

const char *in_format_name;
const char *out_format_name;

// used for converting decoded frame
// to rgb pixel format
uint8_t *rgb_dst_data[4];
int rgb_dst_linesize[4];

// points either to frame->data
// frame->linesize or rgb_dst_data
// rgb_dst_linesize depending on
// whether convertion to rgb was needed
uint8_t **frame_data;
int *frame_linesize;

struct Converter *converter;
// Buffer where audio samples are written after conversion.
// We always convet to packed format, so only out_data[0] is set.
uint8_t **out_data;
// Number of samples in out_data buffer
int out_samples;
// Size of out_data buffer.
// This is the same as out_samples * bytes_per_sample(out_format) * out_channels.
int out_size;
};

struct Reader *reader_alloc();
Expand Down
4 changes: 2 additions & 2 deletions c_src/xav/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int
return enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term);
}

ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, unsigned char *data[],
int *linesize, const char *format_name) {
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, uint8_t *data[4],
int linesize[4], const char *format_name) {
ERL_NIF_TERM data_term;
unsigned char *ptr = enif_make_new_binary(env, linesize[0] * frame->height, &data_term);
memcpy(ptr, data[0], linesize[0] * frame->height);
Expand Down
4 changes: 2 additions & 2 deletions c_src/xav/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void convert_to_rgb(AVFrame *src_frame, uint8_t *dst_data[], int dst_linesize[])
ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term);
ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason);
ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, unsigned char *data[],
int *linesize, const char *out_format_name);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, uint8_t *out_data[4],
int out_linesize[4], const char *out_format);
ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts);
Loading

0 comments on commit 5fec3c1

Please sign in to comment.