Skip to content

Commit

Permalink
Bring back support for older ffmpegs
Browse files Browse the repository at this point in the history
  • Loading branch information
mickel8 committed Aug 2, 2024
1 parent 486121e commit af02667
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 55 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ PRIV_DIR = $(MIX_APP_PATH)/priv
XAV_SO = $(PRIV_DIR)/libxav.so

# uncomment to compile with debug logs
# XAV_DEBUG_LOGS = -DXAV_DEBUG=1
XAV_DEBUG_LOGS = -DXAV_DEBUG=1

HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/converter.h $(XAV_DIR)/utils.h
HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/converter.h $(XAV_DIR)/channel_layout.h $(XAV_DIR)/utils.h
SOURCES = $(XAV_DIR)/xav_nif.c $(XAV_DIR)/reader.c $(XAV_DIR)/decoder.c $(XAV_DIR)/converter.c $(XAV_DIR)/utils.c

CFLAGS = $(XAV_DEBUG_LOGS) -fPIC -shared
Expand Down
12 changes: 12 additions & 0 deletions c_src/xav/channel_layout.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef CHANNEL_LAYOUT_H
#define CHANNEL_LAYOUT_H
#include <libavutil/channel_layout.h>

struct ChannelLayout {
#if LIBAVUTIL_VERSION_MAJOR >= 58
AVChannelLayout layout;
#else
uint64_t layout;
#endif
};
#endif
27 changes: 20 additions & 7 deletions c_src/xav/converter.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,25 @@
#include <libswresample/swresample.h>
#include <stdint.h>

#include "channel_layout.h"
#include "utils.h"

int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_sample_rate,
enum AVSampleFormat in_sample_fmt, AVChannelLayout out_chlayout,
int converter_init(struct Converter *c, struct ChannelLayout in_chlayout, int in_sample_rate,
enum AVSampleFormat in_sample_fmt, struct ChannelLayout out_chlayout,
int out_sample_rate, enum AVSampleFormat out_sample_fmt) {
c->swr_ctx = swr_alloc();
c->in_sample_rate = in_sample_rate;
c->out_sample_rate = out_sample_rate;
c->out_chlayout = out_chlayout;
c->out_sample_fmt = out_sample_fmt;

av_opt_set_chlayout(c->swr_ctx, "in_chlayout", &in_chlayout, 0);
av_opt_set_chlayout(c->swr_ctx, "out_chlayout", &out_chlayout, 0);
#if LIBAVUTIL_VERSION_MAJOR >= 58
av_opt_set_chlayout(c->swr_ctx, "in_chlayout", &in_chlayout.layout, 0);
av_opt_set_chlayout(c->swr_ctx, "out_chlayout", &out_chlayout.layout, 0);
#else
av_opt_set_channel_layout(c->swr_ctx, "in_channel_layout", in_chlayout.layout, 0);
av_opt_set_channel_layout(c->swr_ctx, "out_channel_layout", out_chlayout.layout, 0);
#endif

av_opt_set_int(c->swr_ctx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_int(c->swr_ctx, "out_sample_rate", out_sample_rate, 0);
Expand All @@ -30,6 +36,13 @@ int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_samp

int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_data,
int *out_samples, int *out_size) {

#if LIBAVUTIL_VERSION_MAJOR >= 58
int out_nb_channels = c->out_chlayout.layout.nb_channels;
#else
int out_nb_channels = av_get_channel_layout_nb_channels(c->out_chlayout.layout);
#endif

uint8_t **out_data_tmp = NULL;
int max_out_nb_samples = swr_get_out_samples(c->swr_ctx, src_frame->nb_samples);
int out_bytes_per_sample = av_get_bytes_per_sample(c->out_sample_fmt);
Expand All @@ -38,7 +51,7 @@ int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_da
// to use fast/aligned SIMD routines - this is what align option is used for.
// See https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning
// Because we return the binary straight to the Erlang, we can disable it.
int ret = av_samples_alloc_array_and_samples(&out_data_tmp, NULL, c->out_chlayout.nb_channels,
int ret = av_samples_alloc_array_and_samples(&out_data_tmp, NULL, out_nb_channels,
max_out_nb_samples, c->out_sample_fmt, 1);

if (ret < 0) {
Expand All @@ -58,9 +71,9 @@ int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_da

XAV_LOG_DEBUG("Converted %d samples per channel", *out_samples);

*out_size = *out_samples * out_bytes_per_sample * c->out_chlayout.nb_channels;
*out_size = *out_samples * out_bytes_per_sample * out_nb_channels;

return 0;
}

void converter_free(struct Converter *c) { swr_free(&c->swr_ctx); }
void converter_free(struct Converter *c) { swr_free(&c->swr_ctx); }
10 changes: 6 additions & 4 deletions c_src/xav/converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@
#include <libswresample/swresample.h>
#include <stdint.h>

#include "channel_layout.h"

struct Converter {
SwrContext *swr_ctx;
int64_t in_sample_rate;
int64_t out_sample_rate;
AVChannelLayout out_chlayout;
struct ChannelLayout out_chlayout;
enum AVSampleFormat out_sample_fmt;
};

int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_sample_rate,
enum AVSampleFormat in_sample_fmt, AVChannelLayout out_chlaout,
int converter_init(struct Converter *c, struct ChannelLayout in_chlayout, int in_sample_rate,
enum AVSampleFormat in_sample_fmt, struct ChannelLayout out_chlaout,
int out_sample_rate, enum AVSampleFormat out_sample_fmt);
int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_data,
int *out_samples, int *out_size);
void converter_free(struct Converter *converter);
#endif
#endif
48 changes: 33 additions & 15 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#include "decoder.h"
#include "utils.h"

static int init_converter(struct Decoder *decoder);

int decoder_init(struct Decoder *decoder, const char *codec) {
decoder->swr_ctx = NULL;
decoder->converter = NULL;
decoder->out_data = NULL;

if (strcmp(codec, "opus") == 0) {
Expand Down Expand Up @@ -31,19 +34,6 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
return -1;
}

if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {
AVChannelLayout out_chlayout = decoder->c->ch_layout;
int out_sample_rate = decoder->c->sample_rate;
enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;

int ret = converter_init(&decoder->converter, decoder->c->ch_layout, decoder->c->sample_rate,
decoder->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);

if (ret < 0) {
return ret;
}
}

return 0;
}

Expand Down Expand Up @@ -74,7 +64,15 @@ int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) {
decoder->frame_linesize = frame->linesize;
}
} else if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {
return converter_convert(&decoder->converter, frame, &decoder->out_data, &decoder->out_samples,

if (decoder->converter == NULL) {
ret = init_converter(decoder);
if (ret < 0) {
return ret;
}
}

return converter_convert(decoder->converter, frame, &decoder->out_data, &decoder->out_samples,
&decoder->out_size);
}

Expand All @@ -89,4 +87,24 @@ void decoder_free(struct Decoder *decoder) {
if (decoder->c != NULL) {
avcodec_free_context(&decoder->c);
}
}
}

static int init_converter(struct Decoder *decoder) {
decoder->converter = (struct Converter *)calloc(1, sizeof(struct Converter));
int out_sample_rate = decoder->c->sample_rate;
enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;

struct ChannelLayout in_chlayout, out_chlayout;
#if LIBAVUTIL_VERSION_MAJOR >= 58
in_chlayout.layout = decoder->c->ch_layout;
out_chlayout.layout = decoder->c->ch_layout;
#else
in_chlayout.layout = decoder->c->channel_layout;
out_chlayout.layout = decoder->c->channel_layout;
XAV_LOG_DEBUG("in_chlayout %ld", in_chlayout.layout);
XAV_LOG_DEBUG("in nb_channels %d", av_get_channel_layout_nb_channels(in_chlayout.layout));
#endif

return converter_init(decoder->converter, in_chlayout, decoder->c->sample_rate,
decoder->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);
}
2 changes: 1 addition & 1 deletion c_src/xav/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ struct Decoder {
uint8_t **frame_data;
int *frame_linesize;

struct Converter converter;
struct Converter *converter;
// Buffer where audio samples are written after conversion.
// We always convet to packed format, so only out_data[0] is set.
uint8_t **out_data;
Expand Down
20 changes: 18 additions & 2 deletions c_src/xav/reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <libavutil/samplefmt.h>
#include <libavutil/version.h>

#include <inttypes.h>

int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, int device_flag,
enum AVMediaType media_type) {
int ret;
Expand Down Expand Up @@ -71,11 +73,24 @@ int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, in
}

if (reader->media_type == AVMEDIA_TYPE_AUDIO) {
AVChannelLayout out_chlayout = AV_CHANNEL_LAYOUT_MONO;
int out_sample_rate = 16000;
enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;

int ret = converter_init(&reader->converter, reader->c->ch_layout, reader->c->sample_rate,
struct ChannelLayout in_chlayout, out_chlayout;
#if LIBAVUTIL_VERSION_MAJOR >= 58
XAV_LOG_DEBUG("in nb_channels %d", reader->c->ch_layout.nb_channels);
in_chlayout.layout = reader->c->ch_layout;
av_channel_layout_from_mask(&out_chlayout.layout, AV_CH_LAYOUT_MONO);
#else
in_chlayout.layout = reader->c->channel_layout;
out_chlayout.layout = AV_CH_LAYOUT_MONO;

XAV_LOG_DEBUG("in_chlayout %ld", in_chlayout.layout);
printf("uint64_t %" PRIu64 "\n", t);
XAV_LOG_DEBUG("in nb_channels %d", av_get_channel_layout_nb_channels(in_chlayout.layout));
#endif

int ret = converter_init(&reader->converter, in_chlayout, reader->c->sample_rate,
reader->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);

if (ret < 0) {
Expand Down Expand Up @@ -211,6 +226,7 @@ void reader_free_frame(struct Reader *reader) {

if (reader->out_data != NULL) {
free(reader->out_data);
reader->out_data = NULL;
}
}

Expand Down
20 changes: 0 additions & 20 deletions c_src/xav/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,6 @@ void print_supported_pix_fmts(AVCodec *codec) {
}
}

int init_swr_ctx_from_frame(SwrContext **swr_ctx, AVFrame *frame) {
*swr_ctx = swr_alloc();
enum AVSampleFormat out_sample_fmt = av_get_alt_sample_fmt(frame->format, 0);

#if LIBAVUTIL_VERSION_MAJOR >= 58
av_opt_set_chlayout(*swr_ctx, "in_chlayout", &frame->ch_layout, 0);
av_opt_set_chlayout(*swr_ctx, "out_chlayout", &frame->ch_layout, 0);
#else
av_opt_set_channel_layout(*swr_ctx, "in_channel_layout", frame->channel_layout, 0);
av_opt_set_channel_layout(*swr_ctx, "out_channel_layout", frame->channel_layout, 0);
#endif

av_opt_set_int(*swr_ctx, "in_sample_rate", frame->sample_rate, 0);
av_opt_set_int(*swr_ctx, "out_sample_rate", frame->sample_rate, 0);
av_opt_set_sample_fmt(*swr_ctx, "in_sample_fmt", frame->format, 0);
av_opt_set_sample_fmt(*swr_ctx, "out_sample_fmt", out_sample_fmt, 0);

return swr_init(*swr_ctx);
}

void convert_to_rgb(AVFrame *src_frame, uint8_t *dst_data[], int dst_linesize[]) {
struct SwsContext *sws_ctx =
sws_getContext(src_frame->width, src_frame->height, src_frame->format, src_frame->width,
Expand Down
1 change: 0 additions & 1 deletion c_src/xav/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#define XAV_FREE(X) enif_free(X)

void print_supported_pix_fmts(AVCodec *codec);
int init_swr_ctx_from_frame(SwrContext **swr_ctx, AVFrame *frame);
void convert_to_rgb(AVFrame *src_frame, uint8_t *dst_data[], int dst_linesize[]);

ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term);
Expand Down
3 changes: 2 additions & 1 deletion c_src/xav/xav_nif.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ ERL_NIF_TERM decode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
decoder->frame_linesize, "rgb");

} else if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {
const char *out_format = av_get_sample_fmt_name(decoder->converter.out_sample_fmt);
const char *out_format = av_get_sample_fmt_name(decoder->converter->out_sample_fmt);

frame_term = xav_nif_audio_frame_to_term(env, decoder->out_data, decoder->out_samples,
decoder->out_size, out_format, frame->pts);
Expand All @@ -204,6 +204,7 @@ ERL_NIF_TERM decode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {

if (decoder->out_data != NULL) {
free(decoder->out_data);
decoder->out_data = NULL;
}

return term;
Expand Down
2 changes: 1 addition & 1 deletion test/decoder_test.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
defmodule Xav.DecoderTest do
use ExUnit.Case, async: true
use ExUnit.Case, async: false

@vp8_keyframe <<80, 188, 0, 157, 1, 42, 128, 2, 224, 1, 57, 107, 0, 47, 28, 34, 22, 22, 34, 102,
18, 32, 212, 14, 239, 198, 191, 249, 103, 67, 12, 209, 59, 136, 119, 231, 148,
Expand Down
3 changes: 2 additions & 1 deletion test/reader_test.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
defmodule Xav.ReaderTest do
use ExUnit.Case, async: true
use ExUnit.Case, async: false

test "new/1" do
assert {:ok, %Xav.Reader{}} = Xav.Reader.new("./test/fixtures/sample_h264.mp4")
Expand Down Expand Up @@ -41,6 +41,7 @@ defmodule Xav.ReaderTest do
end
end)

@tag :debug
test "speech to text" do
for {path, expected_output} <- [
# This file has been downloaded from https://audio-samples.github.io/
Expand Down

0 comments on commit af02667

Please sign in to comment.