Skip to content

Commit

Permalink
Add audio and video converters. Fix audio resampling. (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
mickel8 authored Aug 5, 2024
1 parent 407330e commit dad836a
Show file tree
Hide file tree
Showing 27 changed files with 795 additions and 496 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ jobs:
name: test-macos-x86-64 / macos-13 / OTP latest / Elixir latest
env:
MIX_ENV: test
# MacOS runners seem to have static IP addresses
# which results in GitHub rate limiting our requests
# for downloading prebuilt XLA binaries.
# Adding token seems to help.
XLA_HTTP_HEADERS: "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}"
steps:
- uses: actions/checkout@v2
- run: brew install ffmpeg elixir
Expand All @@ -88,6 +93,7 @@ jobs:
name: test-macos-arm / macos-14 / OTP latest / Elixir latest
env:
MIX_ENV: test
XLA_HTTP_HEADERS: "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}"
steps:
- uses: actions/checkout@v2
- run: brew install ffmpeg elixir
Expand Down
20 changes: 15 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@

XAV_DIR = c_src/xav
PRIV_DIR = $(MIX_APP_PATH)/priv
XAV_SO = $(PRIV_DIR)/libxav.so
XAV_DECODER_SO = $(PRIV_DIR)/libxavdecoder.so
XAV_READER_SO = $(PRIV_DIR)/libxavreader.so

# uncomment to compile with debug logs
# XAV_DEBUG_LOGS = -DXAV_DEBUG=1

HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/utils.h
SOURCES = $(XAV_DIR)/xav_nif.c $(XAV_DIR)/reader.c $(XAV_DIR)/decoder.c $(XAV_DIR)/utils.c
DECODER_HEADERS = $(XAV_DIR)/xav_decoder.h $(XAV_DIR)/decoder.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
DECODER_SOURCES = $(XAV_DIR)/xav_decoder.c $(XAV_DIR)/decoder.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c

READER_HEADERS = $(XAV_DIR)/xav_reader.h $(XAV_DIR)/reader.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
READER_SOURCES = $(XAV_DIR)/xav_reader.c $(XAV_DIR)/reader.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c

CFLAGS = $(XAV_DEBUG_LOGS) -fPIC -shared
IFLAGS = -I$(ERTS_INCLUDE_DIR) -I$(XAV_DIR)
Expand All @@ -27,9 +31,15 @@ ifeq ($(shell uname -s),Darwin)
endif
endif

$(XAV_SO): Makefile $(SOURCES) $(HEADERS)
all: $(XAV_DECODER_SO) $(XAV_READER_SO)

$(XAV_DECODER_SO): Makefile $(DECODER_SOURCES) $(DECODER_HEADERS)
mkdir -p $(PRIV_DIR)
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(DECODER_SOURCES) -o $(XAV_DECODER_SO) $(LDFLAGS)

$(XAV_READER_SO): Makefile $(READER_SOURCES) $(READER_HEADERS)
mkdir -p $(PRIV_DIR)
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(SOURCES) -o $(XAV_SO) $(LDFLAGS)
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(READER_SOURCES) -o $(XAV_READER_SO) $(LDFLAGS)

format:
clang-format -i $(XAV_DIR)/*
Expand Down
99 changes: 99 additions & 0 deletions c_src/xav/audio_converter.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
#include <stdint.h>

#include "audio_converter.h"
#include "channel_layout.h"
#include "utils.h"

struct AudioConverter *audio_converter_alloc() {
struct AudioConverter *converter =
(struct AudioConverter *)XAV_ALLOC(sizeof(struct AudioConverter));
converter->swr_ctx = NULL;
return converter;
}

int audio_converter_init(struct AudioConverter *c, struct ChannelLayout in_chlayout,
int in_sample_rate, enum AVSampleFormat in_sample_fmt,
struct ChannelLayout out_chlayout, int out_sample_rate,
enum AVSampleFormat out_sample_fmt) {
c->swr_ctx = swr_alloc();
c->in_sample_rate = in_sample_rate;
c->out_sample_rate = out_sample_rate;
c->out_chlayout = out_chlayout;
c->out_sample_fmt = out_sample_fmt;

#if LIBAVUTIL_VERSION_MAJOR >= 58
av_opt_set_chlayout(c->swr_ctx, "in_chlayout", &in_chlayout.layout, 0);
av_opt_set_chlayout(c->swr_ctx, "out_chlayout", &out_chlayout.layout, 0);
#else
av_opt_set_channel_layout(c->swr_ctx, "in_channel_layout", in_chlayout.layout, 0);
av_opt_set_channel_layout(c->swr_ctx, "out_channel_layout", out_chlayout.layout, 0);
#endif

av_opt_set_int(c->swr_ctx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_int(c->swr_ctx, "out_sample_rate", out_sample_rate, 0);

av_opt_set_sample_fmt(c->swr_ctx, "in_sample_fmt", in_sample_fmt, 0);
av_opt_set_sample_fmt(c->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);

return swr_init(c->swr_ctx);
}

int audio_converter_convert(struct AudioConverter *c, AVFrame *src_frame, uint8_t ***out_data,
int *out_samples, int *out_size) {

#if LIBAVUTIL_VERSION_MAJOR >= 58
int out_nb_channels = c->out_chlayout.layout.nb_channels;
#else
int out_nb_channels = av_get_channel_layout_nb_channels(c->out_chlayout.layout);
#endif

uint8_t **out_data_tmp = NULL;
int max_out_nb_samples = swr_get_out_samples(c->swr_ctx, src_frame->nb_samples);
int out_bytes_per_sample = av_get_bytes_per_sample(c->out_sample_fmt);

// Some parts of ffmpeg require buffers to by divisible by 32
// to use fast/aligned SIMD routines - this is what align option is used for.
// See https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning
// Because we return the binary straight to the Erlang, we can disable it.
int ret = av_samples_alloc_array_and_samples(&out_data_tmp, NULL, out_nb_channels,
max_out_nb_samples, c->out_sample_fmt, 1);

if (ret < 0) {
XAV_LOG_DEBUG("Couldn't allocate array for out samples.");
return ret;
}

*out_samples = swr_convert(c->swr_ctx, out_data_tmp, max_out_nb_samples,
(const uint8_t **)src_frame->data, src_frame->nb_samples);

if (*out_samples < 0) {
XAV_LOG_DEBUG("Couldn't convert samples: %d", *out_samples);
av_freep(&out_data_tmp[0]);
return -1;
}

XAV_LOG_DEBUG("Converted %d samples per channel", *out_samples);

*out_size = *out_samples * out_bytes_per_sample * out_nb_channels;

*out_data = out_data_tmp;

return 0;
}

void audio_converter_free(struct AudioConverter **converter) {
if (*converter != NULL) {
struct AudioConverter *c = *converter;

if (c->swr_ctx != NULL) {
swr_free(&c->swr_ctx);
}

XAV_FREE(c);
*converter = NULL;
}
}
41 changes: 41 additions & 0 deletions c_src/xav/audio_converter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef CONVERTER_H
#define CONVERTER_H
#include <libavutil/channel_layout.h>
#include <libswresample/swresample.h>
#include <stdint.h>

#include "channel_layout.h"

struct AudioConverter {
SwrContext *swr_ctx;
int64_t in_sample_rate;
int64_t out_sample_rate;
struct ChannelLayout out_chlayout;
enum AVSampleFormat out_sample_fmt;
};

struct AudioConverter *audio_converter_alloc(void);

int audio_converter_init(struct AudioConverter *c, struct ChannelLayout in_chlayout,
int in_sample_rate, enum AVSampleFormat in_sample_fmt,
struct ChannelLayout out_chlayout, int out_sample_rate,
enum AVSampleFormat out_sample_fmt);

/**
* Converts AVFrame to the output format.
*
* @param c audio converter
* @param src_frame decoded source frame
* @param out_data buffer where audio samples are written after convertion.
* We always convert to the packed format, so only *out_data[0] is set.
* It will be initialized internally and has to be freed with av_freep(&(*out_data[0])).
* @param out_samples number of samples per channel in out_data buffer.
* @param out_size size of out_buffer in bytes.
* This is the same as *out_samples * bytes_per_sample(out_format) * out_channels
* @return 0 on success and negative value on error.
*/
int audio_converter_convert(struct AudioConverter *c, AVFrame *src_frame, uint8_t ***out_data,
int *out_samples, int *out_size);

void audio_converter_free(struct AudioConverter **converter);
#endif
12 changes: 12 additions & 0 deletions c_src/xav/channel_layout.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef CHANNEL_LAYOUT_H
#define CHANNEL_LAYOUT_H
#include <libavutil/channel_layout.h>

struct ChannelLayout {
#if LIBAVUTIL_VERSION_MAJOR >= 58
AVChannelLayout layout;
#else
uint64_t layout;
#endif
};
#endif
89 changes: 43 additions & 46 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
#include "decoder.h"
#include "utils.h"
#include "video_converter.h"

int decoder_init(struct Decoder *decoder, const char *codec) {
decoder->swr_ctx = NULL;
static int init_converter(struct Decoder *decoder);

struct Decoder *decoder_alloc() {
struct Decoder *decoder = (struct Decoder *)XAV_ALLOC(sizeof(struct Decoder));

decoder->codec = NULL;
decoder->c = NULL;

return decoder;
}

int decoder_init(struct Decoder *decoder, const char *codec) {
if (strcmp(codec, "opus") == 0) {
decoder->media_type = AVMEDIA_TYPE_AUDIO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_OPUS);
// we will initialize out_format_name with the first frame
decoder->out_format_name = NULL;
} else if (strcmp(codec, "vp8") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_VP8);
decoder->out_format_name = "rgb";
} else {
return -1;
}
Expand All @@ -26,6 +33,16 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
return -1;
}

decoder->frame = av_frame_alloc();
if (!decoder->frame) {
return -1;
}

decoder->pkt = av_packet_alloc();
if (!decoder->pkt) {
return -1;
}

if (avcodec_open2(decoder->c, decoder->codec, NULL) < 0) {
return -1;
}
Expand All @@ -40,53 +57,33 @@ int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) {
return -2;
}

ret = avcodec_receive_frame(decoder->c, frame);
if (ret != 0) {
return -1;
}
return avcodec_receive_frame(decoder->c, frame);
}

if (decoder->media_type == AVMEDIA_TYPE_AUDIO && decoder->out_format_name == NULL) {
enum AVSampleFormat out_sample_fmt = av_get_alt_sample_fmt(frame->format, 0);
decoder->out_format_name = av_get_sample_fmt_name(out_sample_fmt);
}
void decoder_free_frame(struct Decoder *decoder) {
// TODO revisit this
av_frame_unref(decoder->frame);
av_packet_unref(decoder->pkt);
}

if (decoder->media_type == AVMEDIA_TYPE_VIDEO) {
if (frame->format != AV_PIX_FMT_RGB24) {
convert_to_rgb(frame, decoder->rgb_dst_data, decoder->rgb_dst_linesize);
decoder->frame_data = decoder->rgb_dst_data;
decoder->frame_linesize = decoder->rgb_dst_linesize;
} else {
decoder->frame_data = frame->data;
decoder->frame_linesize = frame->linesize;
void decoder_free(struct Decoder **decoder) {
XAV_LOG_DEBUG("Freeing Decoder object");
if (*decoder != NULL) {
struct Decoder *d = *decoder;

if (d->c != NULL) {
avcodec_free_context(&d->c);
}
} else if (decoder->media_type == AVMEDIA_TYPE_AUDIO &&
av_sample_fmt_is_planar(frame->format) == 1) {
if (decoder->swr_ctx == NULL) {
if (init_swr_ctx_from_frame(&decoder->swr_ctx, frame) != 0) {
return -1;
}

if (d->pkt != NULL) {
av_packet_free(&d->pkt);
}

if (convert_to_interleaved(decoder->swr_ctx, frame, decoder->rgb_dst_data,
decoder->rgb_dst_linesize) != 0) {
return -1;
if (d->frame != NULL) {
av_frame_free(&d->frame);
}

decoder->frame_data = decoder->rgb_dst_data;
decoder->frame_linesize = decoder->rgb_dst_linesize;
} else {
decoder->frame_data = frame->extended_data;
XAV_FREE(d);
*decoder = NULL;
}

return 0;
}

void decoder_free(struct Decoder *decoder) {
if (decoder->swr_ctx != NULL) {
swr_free(&decoder->swr_ctx);
}

if (decoder->c != NULL) {
avcodec_free_context(&decoder->c);
}
}
20 changes: 10 additions & 10 deletions c_src/xav/decoder.h
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
#include <libavcodec/avcodec.h>
#include <libswresample/swresample.h>

#include "audio_converter.h"
#include "utils.h"

struct Decoder {
enum AVMediaType media_type;
AVFrame *frame;
AVPacket *pkt;
const AVCodec *codec;
AVCodecContext *c;
SwrContext *swr_ctx;

const char *out_format_name;

uint8_t *rgb_dst_data[4];
int rgb_dst_linesize[4];

uint8_t **frame_data;
int *frame_linesize;
};

struct Decoder *decoder_alloc();

int decoder_init(struct Decoder *decoder, const char *codec);

int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame);

void decoder_free(struct Decoder *decoder);
void decoder_free_frame(struct Decoder *decoder);

void decoder_free(struct Decoder **decoder);
Loading

0 comments on commit dad836a

Please sign in to comment.