Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scaling support to converter and decoder #27

Merged
merged 5 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 45 additions & 20 deletions c_src/xav/video_converter.c
Original file line number Diff line number Diff line change
@@ -1,41 +1,65 @@
#include "video_converter.h"
#include "utils.h"

static inline unsigned int video_converter_resolution_changed(struct VideoConverter *converter, AVFrame *frame) {
return converter->in_format != frame->format ||
converter->in_width != frame->width ||
converter->in_height != frame->height;
static inline unsigned int video_converter_resolution_changed(struct VideoConverter *converter,
AVFrame *frame) {
return converter->in_format != frame->format || converter->in_width != frame->width ||
converter->in_height != frame->height;
}

struct VideoConverter *video_converter_alloc() {
struct VideoConverter *converter =
(struct VideoConverter *)XAV_ALLOC(sizeof(struct VideoConverter));
if(converter) {
if (converter) {
converter->sws_ctx = NULL;
converter->dst_frame = av_frame_alloc();
}
return converter;
}

int video_converter_init(struct VideoConverter *converter, int in_width, int in_height,
enum AVPixelFormat in_format, enum AVPixelFormat out_format) {
int video_converter_init(struct VideoConverter *converter, int in_width, int in_height,
enum AVPixelFormat in_format, int out_width, int out_height,
enum AVPixelFormat out_format) {
converter->in_width = in_width;
converter->in_height = in_height;
converter->in_format = in_format;
converter->out_format = out_format;

av_frame_unref(converter->dst_frame);
converter->out_width = out_width;
converter->out_height = out_height;
converter->out_format = out_format;

converter->dst_frame->width = in_width;
converter->dst_frame->height = in_height;
converter->dst_frame->format = out_format;
AVFrame *dst_frame = converter->dst_frame;
av_frame_unref(dst_frame);

dst_frame->format = out_format;

if (out_width == -1 && out_height == -1) {
dst_frame->width = in_width;
dst_frame->height = in_height;
} else if (out_width == -1) {
int width = in_width * out_height / in_height;
width = width + (width % 2);

dst_frame->width = width;
dst_frame->height = out_height;
} else if (out_height == -1) {
int height = in_height * out_width / in_width;
height = height + (height % 2);

dst_frame->width = out_width;
dst_frame->height = height;
} else {
dst_frame->width = out_width;
dst_frame->height = out_height;
}

int ret = av_frame_get_buffer(converter->dst_frame, 0);
int ret = av_frame_get_buffer(dst_frame, 0);
if (ret < 0)
return ret;

converter->sws_ctx = sws_getContext(in_width, in_height, in_format, in_width, in_height, out_format,
SWS_BILINEAR, NULL, NULL, NULL);
converter->sws_ctx =
sws_getContext(in_width, in_height, in_format, dst_frame->width, dst_frame->height,
dst_frame->format, SWS_BILINEAR, NULL, NULL, NULL);

if (!converter->sws_ctx) {
XAV_LOG_DEBUG("Couldn't get sws context");
Expand All @@ -51,8 +75,8 @@ int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame
if (video_converter_resolution_changed(converter, src_frame)) {
XAV_LOG_DEBUG("Frame resolution changed");
sws_freeContext(converter->sws_ctx);
ret = video_converter_init(converter, src_frame->width, src_frame->height,
src_frame->format, converter->out_format);
ret = video_converter_init(converter, src_frame->width, src_frame->height, src_frame->format,
converter->out_width, converter->out_height, converter->out_format);
if (ret < 0) {
return ret;
}
Expand All @@ -61,12 +85,13 @@ int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame
converter->dst_frame->pts = src_frame->pts;

// is this (const uint8_t * const*) cast really correct?
return sws_scale(converter->sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 0,
src_frame->height, converter->dst_frame->data, converter->dst_frame->linesize);
return sws_scale(converter->sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize,
0, src_frame->height, converter->dst_frame->data,
converter->dst_frame->linesize);
}

void video_converter_free(struct VideoConverter **converter) {
struct VideoConverter* vc = *converter;
struct VideoConverter *vc = *converter;
if (vc != NULL) {
if (vc->sws_ctx != NULL) {
sws_freeContext((*converter)->sws_ctx);
Expand Down
19 changes: 11 additions & 8 deletions c_src/xav/video_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,21 @@
#include <stdint.h>

struct VideoConverter {
struct SwsContext *sws_ctx;
int in_width;
int in_height;
enum AVPixelFormat in_format;
enum AVPixelFormat out_format;
AVFrame *dst_frame;
struct SwsContext *sws_ctx;
int in_width;
int in_height;
enum AVPixelFormat in_format;
int out_width;
int out_height;
enum AVPixelFormat out_format;
AVFrame *dst_frame;
};

struct VideoConverter *video_converter_alloc();

int video_converter_init(struct VideoConverter* converter, int in_width, int in_height,
enum AVPixelFormat in_format, enum AVPixelFormat out_format);
int video_converter_init(struct VideoConverter *converter, int in_width, int in_height,
enum AVPixelFormat in_format, int out_width, int out_height,
enum AVPixelFormat out_format);

int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame);

Expand Down
90 changes: 57 additions & 33 deletions c_src/xav/xav_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,17 @@ void free_frames(AVFrame **frames, int size) {
}
}

ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
if (argc != 4) {
ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
if (argc != 6) {
return xav_nif_raise(env, "invalid_arg_count");
}

// resolve codec
unsigned int codec_len;
if (!enif_get_atom_length(env, argv[0], &codec_len, ERL_NIF_LATIN1)) {
return xav_nif_raise(env, "failed_to_get_atom_length");
}
ERL_NIF_TERM ret;
char *codec = NULL;
char *out_format = NULL;

char *codec = (char *)XAV_ALLOC((codec_len + 1) * sizeof(char *));
if (enif_get_atom(env, argv[0], codec, codec_len + 1, ERL_NIF_LATIN1) == 0) {
// resolve codec
if (!xav_get_atom(env, argv[0], &codec)) {
return xav_nif_raise(env, "failed_to_get_atom");
}

Expand All @@ -39,74 +37,94 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
} else if (strcmp(codec, "h264") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_H264;
} else if (strcmp(codec, "h265") == 0) {
} else if (strcmp(codec, "h265") == 0 || strcmp(codec, "hevc") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_HEVC;
} else {
return xav_nif_raise(env, "failed_to_resolve_codec");
ret = xav_nif_raise(env, "failed_to_resolve_codec");
goto clean;
}

// resolve output format
unsigned int out_format_len;
if (!enif_get_atom_length(env, argv[1], &out_format_len, ERL_NIF_LATIN1)) {
return xav_nif_raise(env, "failed_to_get_atom_length");
}

char *out_format = (char *)XAV_ALLOC((out_format_len + 1) * sizeof(char *));
if (enif_get_atom(env, argv[1], out_format, out_format_len + 1, ERL_NIF_LATIN1) == 0) {
return xav_nif_raise(env, "failed_to_get_atom");
if (!xav_get_atom(env, argv[1], &out_format)) {
ret = xav_nif_raise(env, "failed_to_get_atom");
goto clean;
}

enum AVPixelFormat out_video_fmt = AV_PIX_FMT_NONE;
enum AVSampleFormat out_audo_fmt = AV_SAMPLE_FMT_NONE;
if (media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
out_video_fmt = av_get_pix_fmt(out_format);
if (out_video_fmt == AV_PIX_FMT_NONE) {
return xav_nif_raise(env, "unknown_out_format");
ret = xav_nif_raise(env, "unknown_out_format");
goto clean;
}
} else if (media_type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) {
out_audo_fmt = av_get_sample_fmt(out_format);
if (out_audo_fmt == AV_SAMPLE_FMT_NONE) {
return xav_nif_raise(env, "unknown_out_format");
ret = xav_nif_raise(env, "unknown_out_format");
goto clean;
}
}

// resolve other params
int out_sample_rate;
if (!enif_get_int(env, argv[2], &out_sample_rate)) {
return xav_nif_raise(env, "invalid_out_sample_rate");
ret = xav_nif_raise(env, "invalid_out_sample_rate");
goto clean;
}

int out_channels;
if (!enif_get_int(env, argv[3], &out_channels)) {
return xav_nif_raise(env, "invalid_out_channels");
ret = xav_nif_raise(env, "invalid_out_channels");
goto clean;
}

int out_width;
if (!enif_get_int(env, argv[4], &out_width)) {
ret = xav_nif_raise(env, "failed_to_get_int");
goto clean;
}

int out_height;
if (!enif_get_int(env, argv[5], &out_height)) {
ret = xav_nif_raise(env, "failed_to_get_int");
goto clean;
}

struct XavDecoder *xav_decoder =
enif_alloc_resource(xav_decoder_resource_type, sizeof(struct XavDecoder));
xav_decoder->decoder = NULL;
xav_decoder->ac = NULL;
xav_decoder->vc = NULL;
xav_decoder->out_audio_fmt = out_audo_fmt;
xav_decoder->out_video_fmt = out_video_fmt;
xav_decoder->out_width = out_width;
xav_decoder->out_height = out_height;
xav_decoder->out_audio_fmt = out_audo_fmt;
xav_decoder->out_sample_rate = out_sample_rate;
xav_decoder->out_channels = out_channels;

xav_decoder->decoder = decoder_alloc();
if (xav_decoder->decoder == NULL) {
return xav_nif_raise(env, "failed_to_allocate_decoder");
ret = xav_nif_raise(env, "failed_to_allocate_decoder");
goto clean;
}

if (decoder_init(xav_decoder->decoder, media_type, codec_id) != 0) {
return xav_nif_raise(env, "failed_to_init_decoder");
ret = xav_nif_raise(env, "failed_to_init_decoder");
goto clean;
}

ERL_NIF_TERM decoder_term = enif_make_resource(env, xav_decoder);
ret = enif_make_resource(env, xav_decoder);
enif_release_resource(xav_decoder);

XAV_FREE(out_format);
clean:
if (codec != NULL)
XAV_FREE(codec);
if (out_format != NULL)
XAV_FREE(out_format);

return decoder_term;
return ret;
}

ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *frame) {
Expand All @@ -116,7 +134,9 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *fr
if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) {
XAV_LOG_DEBUG("Converting video to RGB");

if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE) {
// no pixel format conversion and no scaling
if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE && xav_decoder->out_width == -1 &&
xav_decoder->out_height == -1) {
return xav_nif_video_frame_to_term(env, frame);
}

Expand Down Expand Up @@ -299,8 +319,12 @@ static int init_video_converter(struct XavDecoder *xav_decoder, AVFrame *frame)
return -1;
}

return video_converter_init(xav_decoder->vc, frame->width, frame->height,
frame->format, xav_decoder->out_video_fmt);
enum AVPixelFormat out_format = xav_decoder->out_video_fmt;
if (out_format == AV_PIX_FMT_NONE)
out_format = frame->format;

return video_converter_init(xav_decoder->vc, frame->width, frame->height, frame->format,
xav_decoder->out_width, xav_decoder->out_height, out_format);
}

void free_xav_decoder(ErlNifEnv *env, void *obj) {
Expand All @@ -319,7 +343,7 @@ void free_xav_decoder(ErlNifEnv *env, void *obj) {
}
}

static ErlNifFunc xav_funcs[] = {{"new", 4, new},
static ErlNifFunc xav_funcs[] = {{"new", 6, new},
{"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND},
{"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND}};

Expand Down
8 changes: 6 additions & 2 deletions c_src/xav/xav_decoder.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
#include "audio_converter.h"
#include "video_converter.h"
#include "decoder.h"
#include "video_converter.h"

#include <libavutil/pixfmt.h>

struct XavDecoder {
struct Decoder *decoder;
struct AudioConverter *ac;
// Video params
struct VideoConverter *vc;
enum AVPixelFormat out_video_fmt;
int out_width;
int out_height;
// Audio params
struct AudioConverter *ac;
enum AVSampleFormat out_audio_fmt;
int out_sample_rate;
int out_channels;
Expand Down
6 changes: 3 additions & 3 deletions c_src/xav/xav_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ static int init_video_converter(struct XavReader *xav_reader, AVFrame *frame);

ErlNifResourceType *xav_reader_resource_type;

ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
if (argc != 6) {
return xav_nif_raise(env, "invalid_arg_count");
}
Expand Down Expand Up @@ -290,8 +290,8 @@ static int init_video_converter(struct XavReader *xav_reader, AVFrame *frame) {
return -1;
}

return video_converter_init(xav_reader->vc, frame->width, frame->height,
frame->format, AV_PIX_FMT_RGB24);
return video_converter_init(xav_reader->vc, frame->width, frame->height, frame->format,
frame->width, frame->height, AV_PIX_FMT_RGB24);
}

void free_xav_reader(ErlNifEnv *env, void *obj) {
Expand Down
2 changes: 1 addition & 1 deletion c_src/xav/xav_reader.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "audio_converter.h"
#include "video_converter.h"
#include "reader.h"
#include "video_converter.h"

struct XavReader {
struct Reader *reader;
Expand Down
Loading