Skip to content

Commit

Permalink
working converter
Browse files Browse the repository at this point in the history
  • Loading branch information
mickel8 committed Aug 1, 2024
1 parent 407330e commit 63c055d
Show file tree
Hide file tree
Showing 9 changed files with 225 additions and 61 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ PRIV_DIR = $(MIX_APP_PATH)/priv
XAV_SO = $(PRIV_DIR)/libxav.so

# uncomment to compile with debug logs
# XAV_DEBUG_LOGS = -DXAV_DEBUG=1
XAV_DEBUG_LOGS = -DXAV_DEBUG=1

HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/utils.h
SOURCES = $(XAV_DIR)/xav_nif.c $(XAV_DIR)/reader.c $(XAV_DIR)/decoder.c $(XAV_DIR)/utils.c
HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/converter.h $(XAV_DIR)/utils.h
SOURCES = $(XAV_DIR)/xav_nif.c $(XAV_DIR)/reader.c $(XAV_DIR)/decoder.c $(XAV_DIR)/converter.c $(XAV_DIR)/utils.c

CFLAGS = $(XAV_DEBUG_LOGS) -fPIC -shared
IFLAGS = -I$(ERTS_INCLUDE_DIR) -I$(XAV_DIR)
Expand Down
93 changes: 93 additions & 0 deletions c_src/xav/converter.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#include "converter.h"
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
#include <stdint.h>

#include "utils.h"

int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_sample_rate,
enum AVSampleFormat in_sample_fmt, AVChannelLayout out_chlayout,
int out_sample_rate, enum AVSampleFormat out_sample_fmt) {
c->swr_ctx = swr_alloc();
c->in_sample_rate = in_sample_rate;
c->out_sample_rate = out_sample_rate;
c->out_chlayout = out_chlayout;
c->out_sample_fmt = out_sample_fmt;

av_opt_set_chlayout(c->swr_ctx, "in_chlayout", &in_chlayout, 0);
av_opt_set_chlayout(c->swr_ctx, "out_chlayout", &out_chlayout, 0);

av_opt_set_int(c->swr_ctx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_int(c->swr_ctx, "out_sample_rate", out_sample_rate, 0);

av_opt_set_sample_fmt(c->swr_ctx, "in_sample_fmt", in_sample_fmt, 0);
av_opt_set_sample_fmt(c->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);

return swr_init(c->swr_ctx);
}

int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_data,
int *out_count) {
// int alt_max_out_nb_samples =
// av_rescale_rnd(src_frame->nb_samples, c->out_sample_rate, c->in_sample_rate, AV_ROUND_UP);

// AVChannelLayout out_chlayout;
// av_opt_get_chlayout(c->swr_ctx, "out_chlayout", 0, &out_chlayout);

// XAV_LOG_DEBUG("swr ctx in nb channels %d", out_ch)

// int max_out_nb_samples = swr_get_out_samples(c->swr_ctx, src_frame->nb_samples);
int max_out_nb_samples = swr_get_out_samples(c->swr_ctx, src_frame->nb_samples);
int out_bytes_per_sample = av_get_bytes_per_sample(c->out_sample_fmt);
int out_linesize;
uint8_t **out_data_tmp = NULL;
XAV_LOG_DEBUG("max out nb samples %d", max_out_nb_samples);
// XAV_LOG_DEBUG("alt max out nb samples %d", alt_max_out_nb_samples);
XAV_LOG_DEBUG("out_chlayout.nb_channels %d", c->out_chlayout.nb_channels);
XAV_LOG_DEBUG("out_bytes_per_sample %d", out_bytes_per_sample);
XAV_LOG_DEBUG("out buffer size: %d",
max_out_nb_samples * c->out_chlayout.nb_channels * out_bytes_per_sample);

int ret =
av_samples_alloc_array_and_samples(&out_data_tmp, &out_linesize, c->out_chlayout.nb_channels,
max_out_nb_samples, c->out_sample_fmt, 1);

XAV_LOG_DEBUG("ret: %d", ret);

*out_data = out_data_tmp;

// uint8_t **in_data = NULL;
// int in_linesize;
// int in_bytes_per_sample = av_get_bytes_per_sample(src_frame->format);

// ret = av_samples_alloc_array_and_samples(&in_data, &in_linesize,
// src_frame->ch_layout.nb_channels,
// src_frame->nb_samples, src_frame->format, 1);

// XAV_LOG_DEBUG("src_frame->nb_samples: %d", src_frame->nb_samples);
// XAV_LOG_DEBUG("src_frame->channels: %d", src_frame->ch_layout.nb_channels);
// XAV_LOG_DEBUG("src_bytes_per_sample: %d", in_bytes_per_sample);
// XAV_LOG_DEBUG("in linesize: %d", in_linesize);
// XAV_LOG_DEBUG("in ret: %d", ret);
// XAV_LOG_DEBUG("src_frame->linsize[0]: %d", src_frame->linesize[0]);

// int in_size = src_frame->linesize[0] * src_frame->ch_layout.nb_channels;

// XAV_LOG_DEBUG("in size: %d", in_size);

// memcpy(in_data, src_frame->data, ret);

*out_count = swr_convert(c->swr_ctx, out_data_tmp, max_out_nb_samples,
(const uint8_t **)src_frame->data, src_frame->nb_samples);

XAV_LOG_DEBUG("out_count: %d", *out_count);
if (*out_count < 0) {
return -1;
}

return 0;
}

void converter_free(struct Converter *c) { swr_free(&c->swr_ctx); }
17 changes: 17 additions & 0 deletions c_src/xav/converter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <libavutil/channel_layout.h>
#include <libswresample/swresample.h>
#include <stdint.h>

struct Converter {
SwrContext *swr_ctx;
int64_t in_sample_rate;
int64_t out_sample_rate;
AVChannelLayout out_chlayout;
enum AVSampleFormat out_sample_fmt;
};

int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_sample_rate,
enum AVSampleFormat in_sample_fmt, AVChannelLayout out_chlaout,
int out_sample_rate, enum AVSampleFormat out_sample_fmt);
int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_data, int *out_count);
void converter_free(struct Converter *converter);
60 changes: 23 additions & 37 deletions c_src/xav/reader.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "reader.h"
#include "utils.h"
#include <libavutil/samplefmt.h>
#include <libavutil/version.h>

int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, int device_flag,
Expand Down Expand Up @@ -69,23 +70,13 @@ int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, in
}

if (reader->media_type == AVMEDIA_TYPE_AUDIO) {
reader->swr_ctx = swr_alloc();
enum AVSampleFormat out_sample_fmt = av_get_alt_sample_fmt(reader->c->sample_fmt, 0);

#if LIBAVUTIL_VERSION_MAJOR >= 58
av_opt_set_chlayout(reader->swr_ctx, "in_chlayout", &reader->c->ch_layout, 0);
av_opt_set_chlayout(reader->swr_ctx, "out_chlayout", &reader->c->ch_layout, 0);
#else
av_opt_set_channel_layout(reader->swr_ctx, "in_channel_layout", reader->c->channel_layout, 0);
av_opt_set_channel_layout(reader->swr_ctx, "out_channel_layout", reader->c->channel_layout, 0);
#endif

av_opt_set_int(reader->swr_ctx, "in_sample_rate", reader->c->sample_rate, 0);
av_opt_set_int(reader->swr_ctx, "out_sample_rate", reader->c->sample_rate, 0);
av_opt_set_sample_fmt(reader->swr_ctx, "in_sample_fmt", reader->c->sample_fmt, 0);
av_opt_set_sample_fmt(reader->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);

ret = swr_init(reader->swr_ctx);
AVChannelLayout out_chlayout = AV_CHANNEL_LAYOUT_MONO;
int out_sample_rate = 16000;
enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;

int ret = converter_init(&reader->converter, reader->c->ch_layout, reader->c->sample_rate,
reader->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);

if (ret < 0) {
return ret;
}
Expand Down Expand Up @@ -156,6 +147,10 @@ int reader_next_frame(struct Reader *reader) {

if (ret == 0) {
XAV_LOG_DEBUG("Received frame");
XAV_LOG_DEBUG("linesize after read frame %d", reader->frame->linesize[0]);
XAV_LOG_DEBUG("channels after read frame %d", reader->frame->ch_layout.nb_channels);
XAV_LOG_DEBUG("samples after read frame %d", reader->frame->nb_samples);
XAV_LOG_DEBUG("bytes per sample %d", av_get_bytes_per_sample(reader->frame->format));
frame_ready = 1;
} else if (ret == AVERROR_EOF) {
XAV_LOG_DEBUG("EOF");
Expand Down Expand Up @@ -200,19 +195,10 @@ int reader_next_frame(struct Reader *reader) {
} else if (reader->media_type == AVMEDIA_TYPE_VIDEO) {
reader->frame_data = reader->frame->data;
reader->frame_linesize = reader->frame->linesize;
} else if (reader->media_type == AVMEDIA_TYPE_AUDIO &&
av_sample_fmt_is_planar(reader->frame->format) == 1) {
XAV_LOG_DEBUG("Converting to interleaved");

if (convert_to_interleaved(reader->swr_ctx, reader->frame, reader->rgb_dst_data,
reader->rgb_dst_linesize) != 0) {
return -1;
}

reader->frame_data = reader->rgb_dst_data;
reader->frame_linesize = reader->rgb_dst_linesize;
} else {
reader->frame_data = reader->frame->extended_data;
} else if (reader->media_type == AVMEDIA_TYPE_AUDIO) {
XAV_LOG_DEBUG("Converting to out format");
return converter_convert(&reader->converter, reader->frame, &reader->out_data,
&reader->out_count);
}

return 0;
Expand All @@ -230,12 +216,12 @@ void reader_free_frame(struct Reader *reader) {

void reader_free(struct Reader *reader) {
XAV_LOG_DEBUG("Freeing Reader object");
if (reader->swr_ctx != NULL) {
swr_free(&reader->swr_ctx);
}
avcodec_free_context(&reader->c);
av_packet_free(&reader->pkt);
av_frame_free(&reader->frame);
avformat_close_input(&reader->fmt_ctx);
// if (reader->swr_ctx != NULL) {
// swr_free(&reader->swr_ctx);
// }
// avcodec_free_context(&reader->c);
// av_packet_free(&reader->pkt);
// av_frame_free(&reader->frame);
// avformat_close_input(&reader->fmt_ctx);
XAV_FREE(reader->path);
}
5 changes: 5 additions & 0 deletions c_src/xav/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <string.h>
#include <time.h>

#include "converter.h"
#include "utils.h"

struct Reader {
Expand Down Expand Up @@ -39,6 +40,10 @@ struct Reader {
// whether convertion to rgb was needed
uint8_t **frame_data;
int *frame_linesize;

struct Converter converter;
uint8_t **out_data;
int out_count;
};

int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, int device_flag,
Expand Down
27 changes: 21 additions & 6 deletions c_src/xav/utils.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#include "utils.h"
#include <libavutil/mathematics.h>
#include <libavutil/opt.h>
#include <stdint.h>

void print_supported_pix_fmts(AVCodec *codec) {
if (codec->pix_fmts == NULL) {
Expand Down Expand Up @@ -45,21 +48,33 @@ void convert_to_rgb(AVFrame *src_frame, uint8_t *dst_data[], int dst_linesize[])
int convert_to_interleaved(SwrContext *swr_ctx, AVFrame *src_frame, uint8_t **dst_data,
int *dst_linesize) {
#if LIBAVUTIL_VERSION_MAJOR >= 58
int channels = src_frame->ch_layout.nb_channels;
AVChannelLayout out_chlayout;
av_opt_get_chlayout(swr_ctx, "out_chlayout", 0, &out_chlayout);
int dst_channels = out_chlayout.nb_channels;
#else
int channels = src_frame->channels;
int channels = swr_ctx->out_channel_layout.nb_channels;
#endif

int samples_per_channel = src_frame->nb_samples;
enum AVSampleFormat out_sample_fmt;
av_opt_get_sample_fmt(swr_ctx, "out_sample_fmt", 0, &out_sample_fmt);

int64_t out_sample_rate;
av_opt_get_int(swr_ctx, "out_sample_rate", 0, &out_sample_rate);

int64_t in_sample_rate;
av_opt_get_int(swr_ctx, "in_sample_rate", 0, &in_sample_rate);

int dst_nb_samples =
av_rescale_rnd(src_frame->nb_samples, out_sample_rate, in_sample_rate, AV_ROUND_UP);

int ret =
av_samples_alloc(dst_data, dst_linesize, channels, samples_per_channel, src_frame->format, 0);
av_samples_alloc(dst_data, dst_linesize, dst_channels, dst_nb_samples, out_sample_fmt, 0);
if (ret < 0) {
return ret;
}

ret = swr_convert(swr_ctx, dst_data, samples_per_channel, (const uint8_t **)src_frame->data,
samples_per_channel);
ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_frame->data,
src_frame->nb_samples);
if (ret < 0) {
return ret;
}
Expand Down
23 changes: 20 additions & 3 deletions c_src/xav/xav_nif.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,28 @@ ERL_NIF_TERM next_frame(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
frame_term = xav_nif_video_frame_to_term(env, reader->frame, reader->frame_data,
reader->frame_linesize, reader->out_format_name);
} else if (reader->media_type == AVMEDIA_TYPE_AUDIO) {
frame_term = xav_nif_audio_frame_to_term(env, reader->frame, reader->frame_data,
reader->out_format_name);
// frame_term = xav_nif_audio_frame_to_term(env, reader->frame, reader->frame_data,
// reader->out_format_name);

ERL_NIF_TERM data_term;

size_t out_size = reader->out_count *
av_get_bytes_per_sample(reader->converter.out_sample_fmt) *
reader->converter.out_chlayout.nb_channels;

unsigned char *ptr = enif_make_new_binary(env, out_size, &data_term);
memcpy(ptr, reader->out_data[0], out_size);

const char *out_format_name = av_get_sample_fmt_name(reader->converter.out_sample_fmt);

ERL_NIF_TERM samples_term = enif_make_int(env, reader->out_count);
ERL_NIF_TERM format_term = enif_make_atom(env, out_format_name);
ERL_NIF_TERM pts_term = enif_make_int(env, reader->frame->pts);

frame_term = enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term);
}

reader_free_frame(reader);
// reader_free_frame(reader);

return xav_nif_ok(env, frame_term);
}
Expand Down
2 changes: 1 addition & 1 deletion lib/frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ defmodule Xav.Frame do
end

def to_nx(%__MODULE__{type: :audio} = frame) do
Nx.from_binary(frame.data, to_nx_format(frame.format))
Nx.from_binary(frame.data, to_nx_format(frame.format), backend: Nx.BinaryBackend)
end

defp to_nx_format(:u8), do: :u8
Expand Down
Loading

0 comments on commit 63c055d

Please sign in to comment.