Skip to content

Commit

Permalink
Add scaling support to converter and decoder (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
gBillal authored Jan 19, 2025
1 parent b52115d commit 56253a4
Show file tree
Hide file tree
Showing 15 changed files with 334 additions and 164 deletions.
65 changes: 45 additions & 20 deletions c_src/xav/video_converter.c
Original file line number Diff line number Diff line change
@@ -1,41 +1,65 @@
#include "video_converter.h"
#include "utils.h"

static inline unsigned int video_converter_resolution_changed(struct VideoConverter *converter, AVFrame *frame) {
return converter->in_format != frame->format ||
converter->in_width != frame->width ||
converter->in_height != frame->height;
static inline unsigned int video_converter_resolution_changed(struct VideoConverter *converter,
AVFrame *frame) {
return converter->in_format != frame->format || converter->in_width != frame->width ||
converter->in_height != frame->height;
}

struct VideoConverter *video_converter_alloc() {
struct VideoConverter *converter =
(struct VideoConverter *)XAV_ALLOC(sizeof(struct VideoConverter));
if(converter) {
if (converter) {
converter->sws_ctx = NULL;
converter->dst_frame = av_frame_alloc();
}
return converter;
}

int video_converter_init(struct VideoConverter *converter, int in_width, int in_height,
enum AVPixelFormat in_format, enum AVPixelFormat out_format) {
int video_converter_init(struct VideoConverter *converter, int in_width, int in_height,
enum AVPixelFormat in_format, int out_width, int out_height,
enum AVPixelFormat out_format) {
converter->in_width = in_width;
converter->in_height = in_height;
converter->in_format = in_format;
converter->out_format = out_format;

av_frame_unref(converter->dst_frame);
converter->out_width = out_width;
converter->out_height = out_height;
converter->out_format = out_format;

converter->dst_frame->width = in_width;
converter->dst_frame->height = in_height;
converter->dst_frame->format = out_format;
AVFrame *dst_frame = converter->dst_frame;
av_frame_unref(dst_frame);

dst_frame->format = out_format;

if (out_width == -1 && out_height == -1) {
dst_frame->width = in_width;
dst_frame->height = in_height;
} else if (out_width == -1) {
int width = in_width * out_height / in_height;
width = width + (width % 2);

dst_frame->width = width;
dst_frame->height = out_height;
} else if (out_height == -1) {
int height = in_height * out_width / in_width;
height = height + (height % 2);

dst_frame->width = out_width;
dst_frame->height = height;
} else {
dst_frame->width = out_width;
dst_frame->height = out_height;
}

int ret = av_frame_get_buffer(converter->dst_frame, 0);
int ret = av_frame_get_buffer(dst_frame, 0);
if (ret < 0)
return ret;

converter->sws_ctx = sws_getContext(in_width, in_height, in_format, in_width, in_height, out_format,
SWS_BILINEAR, NULL, NULL, NULL);
converter->sws_ctx =
sws_getContext(in_width, in_height, in_format, dst_frame->width, dst_frame->height,
dst_frame->format, SWS_BILINEAR, NULL, NULL, NULL);

if (!converter->sws_ctx) {
XAV_LOG_DEBUG("Couldn't get sws context");
Expand All @@ -51,8 +75,8 @@ int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame
if (video_converter_resolution_changed(converter, src_frame)) {
XAV_LOG_DEBUG("Frame resolution changed");
sws_freeContext(converter->sws_ctx);
ret = video_converter_init(converter, src_frame->width, src_frame->height,
src_frame->format, converter->out_format);
ret = video_converter_init(converter, src_frame->width, src_frame->height, src_frame->format,
converter->out_width, converter->out_height, converter->out_format);
if (ret < 0) {
return ret;
}
Expand All @@ -61,12 +85,13 @@ int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame
converter->dst_frame->pts = src_frame->pts;

// is this (const uint8_t * const*) cast really correct?
return sws_scale(converter->sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 0,
src_frame->height, converter->dst_frame->data, converter->dst_frame->linesize);
return sws_scale(converter->sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize,
0, src_frame->height, converter->dst_frame->data,
converter->dst_frame->linesize);
}

void video_converter_free(struct VideoConverter **converter) {
struct VideoConverter* vc = *converter;
struct VideoConverter *vc = *converter;
if (vc != NULL) {
if (vc->sws_ctx != NULL) {
sws_freeContext((*converter)->sws_ctx);
Expand Down
19 changes: 11 additions & 8 deletions c_src/xav/video_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,21 @@
#include <stdint.h>

struct VideoConverter {
struct SwsContext *sws_ctx;
int in_width;
int in_height;
enum AVPixelFormat in_format;
enum AVPixelFormat out_format;
AVFrame *dst_frame;
struct SwsContext *sws_ctx;
int in_width;
int in_height;
enum AVPixelFormat in_format;
int out_width;
int out_height;
enum AVPixelFormat out_format;
AVFrame *dst_frame;
};

struct VideoConverter *video_converter_alloc();

int video_converter_init(struct VideoConverter* converter, int in_width, int in_height,
enum AVPixelFormat in_format, enum AVPixelFormat out_format);
int video_converter_init(struct VideoConverter *converter, int in_width, int in_height,
enum AVPixelFormat in_format, int out_width, int out_height,
enum AVPixelFormat out_format);

int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame);

Expand Down
90 changes: 57 additions & 33 deletions c_src/xav/xav_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,17 @@ void free_frames(AVFrame **frames, int size) {
}
}

ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
if (argc != 4) {
ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
if (argc != 6) {
return xav_nif_raise(env, "invalid_arg_count");
}

// resolve codec
unsigned int codec_len;
if (!enif_get_atom_length(env, argv[0], &codec_len, ERL_NIF_LATIN1)) {
return xav_nif_raise(env, "failed_to_get_atom_length");
}
ERL_NIF_TERM ret;
char *codec = NULL;
char *out_format = NULL;

char *codec = (char *)XAV_ALLOC((codec_len + 1) * sizeof(char *));
if (enif_get_atom(env, argv[0], codec, codec_len + 1, ERL_NIF_LATIN1) == 0) {
// resolve codec
if (!xav_get_atom(env, argv[0], &codec)) {
return xav_nif_raise(env, "failed_to_get_atom");
}

Expand All @@ -39,74 +37,94 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
} else if (strcmp(codec, "h264") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_H264;
} else if (strcmp(codec, "h265") == 0) {
} else if (strcmp(codec, "h265") == 0 || strcmp(codec, "hevc") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_HEVC;
} else {
return xav_nif_raise(env, "failed_to_resolve_codec");
ret = xav_nif_raise(env, "failed_to_resolve_codec");
goto clean;
}

// resolve output format
unsigned int out_format_len;
if (!enif_get_atom_length(env, argv[1], &out_format_len, ERL_NIF_LATIN1)) {
return xav_nif_raise(env, "failed_to_get_atom_length");
}

char *out_format = (char *)XAV_ALLOC((out_format_len + 1) * sizeof(char *));
if (enif_get_atom(env, argv[1], out_format, out_format_len + 1, ERL_NIF_LATIN1) == 0) {
return xav_nif_raise(env, "failed_to_get_atom");
if (!xav_get_atom(env, argv[1], &out_format)) {
ret = xav_nif_raise(env, "failed_to_get_atom");
goto clean;
}

enum AVPixelFormat out_video_fmt = AV_PIX_FMT_NONE;
enum AVSampleFormat out_audo_fmt = AV_SAMPLE_FMT_NONE;
if (media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
out_video_fmt = av_get_pix_fmt(out_format);
if (out_video_fmt == AV_PIX_FMT_NONE) {
return xav_nif_raise(env, "unknown_out_format");
ret = xav_nif_raise(env, "unknown_out_format");
goto clean;
}
} else if (media_type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) {
out_audo_fmt = av_get_sample_fmt(out_format);
if (out_audo_fmt == AV_SAMPLE_FMT_NONE) {
return xav_nif_raise(env, "unknown_out_format");
ret = xav_nif_raise(env, "unknown_out_format");
goto clean;
}
}

// resolve other params
int out_sample_rate;
if (!enif_get_int(env, argv[2], &out_sample_rate)) {
return xav_nif_raise(env, "invalid_out_sample_rate");
ret = xav_nif_raise(env, "invalid_out_sample_rate");
goto clean;
}

int out_channels;
if (!enif_get_int(env, argv[3], &out_channels)) {
return xav_nif_raise(env, "invalid_out_channels");
ret = xav_nif_raise(env, "invalid_out_channels");
goto clean;
}

int out_width;
if (!enif_get_int(env, argv[4], &out_width)) {
ret = xav_nif_raise(env, "failed_to_get_int");
goto clean;
}

int out_height;
if (!enif_get_int(env, argv[5], &out_height)) {
ret = xav_nif_raise(env, "failed_to_get_int");
goto clean;
}

struct XavDecoder *xav_decoder =
enif_alloc_resource(xav_decoder_resource_type, sizeof(struct XavDecoder));
xav_decoder->decoder = NULL;
xav_decoder->ac = NULL;
xav_decoder->vc = NULL;
xav_decoder->out_audio_fmt = out_audo_fmt;
xav_decoder->out_video_fmt = out_video_fmt;
xav_decoder->out_width = out_width;
xav_decoder->out_height = out_height;
xav_decoder->out_audio_fmt = out_audo_fmt;
xav_decoder->out_sample_rate = out_sample_rate;
xav_decoder->out_channels = out_channels;

xav_decoder->decoder = decoder_alloc();
if (xav_decoder->decoder == NULL) {
return xav_nif_raise(env, "failed_to_allocate_decoder");
ret = xav_nif_raise(env, "failed_to_allocate_decoder");
goto clean;
}

if (decoder_init(xav_decoder->decoder, media_type, codec_id) != 0) {
return xav_nif_raise(env, "failed_to_init_decoder");
ret = xav_nif_raise(env, "failed_to_init_decoder");
goto clean;
}

ERL_NIF_TERM decoder_term = enif_make_resource(env, xav_decoder);
ret = enif_make_resource(env, xav_decoder);
enif_release_resource(xav_decoder);

XAV_FREE(out_format);
clean:
if (codec != NULL)
XAV_FREE(codec);
if (out_format != NULL)
XAV_FREE(out_format);

return decoder_term;
return ret;
}

ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *frame) {
Expand All @@ -116,7 +134,9 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *fr
if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) {
XAV_LOG_DEBUG("Converting video to RGB");

if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE) {
// no pixel format conversion and no scaling
if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE && xav_decoder->out_width == -1 &&
xav_decoder->out_height == -1) {
return xav_nif_video_frame_to_term(env, frame);
}

Expand Down Expand Up @@ -299,8 +319,12 @@ static int init_video_converter(struct XavDecoder *xav_decoder, AVFrame *frame)
return -1;
}

return video_converter_init(xav_decoder->vc, frame->width, frame->height,
frame->format, xav_decoder->out_video_fmt);
enum AVPixelFormat out_format = xav_decoder->out_video_fmt;
if (out_format == AV_PIX_FMT_NONE)
out_format = frame->format;

return video_converter_init(xav_decoder->vc, frame->width, frame->height, frame->format,
xav_decoder->out_width, xav_decoder->out_height, out_format);
}

void free_xav_decoder(ErlNifEnv *env, void *obj) {
Expand All @@ -319,7 +343,7 @@ void free_xav_decoder(ErlNifEnv *env, void *obj) {
}
}

static ErlNifFunc xav_funcs[] = {{"new", 4, new},
static ErlNifFunc xav_funcs[] = {{"new", 6, new},
{"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND},
{"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND}};

Expand Down
8 changes: 6 additions & 2 deletions c_src/xav/xav_decoder.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
#include "audio_converter.h"
#include "video_converter.h"
#include "decoder.h"
#include "video_converter.h"

#include <libavutil/pixfmt.h>

struct XavDecoder {
struct Decoder *decoder;
struct AudioConverter *ac;
// Video params
struct VideoConverter *vc;
enum AVPixelFormat out_video_fmt;
int out_width;
int out_height;
// Audio params
struct AudioConverter *ac;
enum AVSampleFormat out_audio_fmt;
int out_sample_rate;
int out_channels;
Expand Down
6 changes: 3 additions & 3 deletions c_src/xav/xav_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ static int init_video_converter(struct XavReader *xav_reader, AVFrame *frame);

ErlNifResourceType *xav_reader_resource_type;

ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
if (argc != 6) {
return xav_nif_raise(env, "invalid_arg_count");
}
Expand Down Expand Up @@ -290,8 +290,8 @@ static int init_video_converter(struct XavReader *xav_reader, AVFrame *frame) {
return -1;
}

return video_converter_init(xav_reader->vc, frame->width, frame->height,
frame->format, AV_PIX_FMT_RGB24);
return video_converter_init(xav_reader->vc, frame->width, frame->height, frame->format,
frame->width, frame->height, AV_PIX_FMT_RGB24);
}

void free_xav_reader(ErlNifEnv *env, void *obj) {
Expand Down
2 changes: 1 addition & 1 deletion c_src/xav/xav_reader.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "audio_converter.h"
#include "video_converter.h"
#include "reader.h"
#include "video_converter.h"

struct XavReader {
struct Reader *reader;
Expand Down
Loading

0 comments on commit 56253a4

Please sign in to comment.