Skip to content

Commit

Permalink
Refactor out format resolving. Fix formatting.
Browse files Browse the repository at this point in the history
This commit moves out format resolving to the xav_decoder.c.
This way we can remove out_format from decoder struct.
  • Loading branch information
mickel8 committed Jan 3, 2025
1 parent 13fc3b0 commit a26f39a
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 87 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ end
Decode

```elixir
decoder = Xav.Decoder.new(:vp8)
decoder = Xav.Decoder.new(:vp8, out_format: :rgb24)
{:ok, %Xav.Frame{} = frame} = Xav.Decoder.decode(decoder, <<"somebinary">>)
```

Expand All @@ -52,7 +52,7 @@ Kino.Image.new(tensor)
Read from a camera:

```elixir
r = Xav.Reader.new!("/dev/video0", device?: true)
r = Xav.Reader.new!("/dev/video0", device?: true, out_format: :rgb24)
{:ok, %Xav.Frame{} = frame} = Xav.Reader.next_frame(r)
tensor = Xav.Frame.to_nx(frame)
Kino.Image.new(tensor)
Expand Down
29 changes: 4 additions & 25 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,18 @@ struct Decoder *decoder_alloc() {

decoder->codec = NULL;
decoder->c = NULL;
decoder->out_format = AV_PIX_FMT_NONE;

return decoder;
}

int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format) {
if (strcmp(codec, "opus") == 0) {
decoder->media_type = AVMEDIA_TYPE_AUDIO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_OPUS);
} else if (strcmp(codec, "vp8") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_VP8);
} else if (strcmp(codec, "h264") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H264);
} else if (strcmp(codec, "h265") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
} else {
return -1;
}
int decoder_init(struct Decoder *decoder, enum AVMediaType media_type, enum AVCodecID codec_id) {
decoder->media_type = media_type;
decoder->codec = avcodec_find_decoder(codec_id);

if (!decoder->codec) {
return -1;
}

if(decoder->media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
decoder->out_format = av_get_pix_fmt(out_format);
if (decoder->out_format == AV_PIX_FMT_NONE) {
return -1;
}
}

decoder->c = avcodec_alloc_context3(decoder->codec);
if (!decoder->c) {
return -1;
Expand Down Expand Up @@ -74,7 +53,7 @@ int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) {
return avcodec_receive_frame(decoder->c, frame);
}

int decoder_flush(struct Decoder *decoder, AVFrame **frames, int *frames_count) {
int decoder_flush(struct Decoder *decoder, AVFrame **frames, int *frames_count) {
int ret = avcodec_send_packet(decoder->c, NULL);
if (ret != 0) {
return ret;
Expand Down
3 changes: 1 addition & 2 deletions c_src/xav/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

struct Decoder {
enum AVMediaType media_type;
enum AVPixelFormat out_format;
AVFrame *frame;
AVPacket *pkt;
const AVCodec *codec;
Expand All @@ -17,7 +16,7 @@ struct Decoder {

struct Decoder *decoder_alloc();

int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format);
int decoder_init(struct Decoder *decoder, enum AVMediaType media_type, enum AVCodecID codec_id);

int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame);

Expand Down
13 changes: 7 additions & 6 deletions c_src/xav/utils.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "utils.h"
#include <libavutil/mathematics.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/opt.h>
#include <stdint.h>

Expand All @@ -21,14 +21,14 @@ ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg) {
}

ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts) {
int out_size, enum AVSampleFormat out_format, int pts) {
ERL_NIF_TERM data_term;

unsigned char *ptr = enif_make_new_binary(env, out_size, &data_term);
memcpy(ptr, out_data[0], out_size);

ERL_NIF_TERM samples_term = enif_make_int(env, out_samples);
ERL_NIF_TERM format_term = enif_make_atom(env, out_format);
ERL_NIF_TERM format_term = enif_make_atom(env, av_get_sample_fmt_name(out_format));
ERL_NIF_TERM pts_term = enif_make_int(env, pts);

return enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term);
Expand All @@ -39,9 +39,10 @@ ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame) {

int payload_size = av_image_get_buffer_size(frame->format, frame->width, frame->height, 1);
unsigned char *ptr = enif_make_new_binary(env, payload_size, &data_term);

av_image_copy_to_buffer(ptr, payload_size, (const uint8_t *const *)frame->data,
(const int*)frame->linesize, frame->format, frame->width, frame->height, 1);

av_image_copy_to_buffer(ptr, payload_size, (const uint8_t *const *)frame->data,
(const int *)frame->linesize, frame->format, frame->width, frame->height,
1);

ERL_NIF_TERM format_term = enif_make_atom(env, av_get_pix_fmt_name(frame->format));
ERL_NIF_TERM height_term = enif_make_int(env, frame->height);
Expand Down
2 changes: 1 addition & 1 deletion c_src/xav/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason);
ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame);
ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts);
int out_size, enum AVSampleFormat out_format, int pts);
4 changes: 2 additions & 2 deletions c_src/xav/video_converter.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "video_converter.h"

int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixelFormat out_format) {
int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame,
enum AVPixelFormat out_format) {
int ret;

*dst_frame = av_frame_alloc();
Expand All @@ -26,7 +27,6 @@ int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixe
return ret;
}


// is this (const uint8_t * const*) cast really correct?
ret = sws_scale(sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 0,
src_frame->height, (*dst_frame)->data, (*dst_frame)->linesize);
Expand Down
90 changes: 53 additions & 37 deletions c_src/xav/xav_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,61 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_raise(env, "invalid_arg_count");
}

// resolve codec
unsigned int codec_len;
if (!enif_get_atom_length(env, argv[0], &codec_len, ERL_NIF_LATIN1)) {
return xav_nif_raise(env, "failed_to_get_atom_length");
}

char *codec = (char *)XAV_ALLOC((codec_len + 1) * sizeof(char *));

if (enif_get_atom(env, argv[0], codec, codec_len + 1, ERL_NIF_LATIN1) == 0) {
return xav_nif_raise(env, "failed_to_get_atom");
}

enum AVMediaType media_type;
enum AVCodecID codec_id;
if (strcmp(codec, "opus") == 0) {
media_type = AVMEDIA_TYPE_AUDIO;
codec_id = AV_CODEC_ID_OPUS;
} else if (strcmp(codec, "vp8") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_VP8;
} else if (strcmp(codec, "h264") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_H264;
} else if (strcmp(codec, "h265") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_HEVC;
} else {
return xav_nif_raise(env, "failed_to_resolve_codec");
}

// resolve output format
unsigned int out_format_len;
if (!enif_get_atom_length(env, argv[1], &out_format_len, ERL_NIF_LATIN1)) {
return xav_nif_raise(env, "failed_to_get_atom_length");
}

char *out_format = (char *)XAV_ALLOC((out_format_len + 1) * sizeof(char *));

if (enif_get_atom(env, argv[1], out_format, out_format_len + 1, ERL_NIF_LATIN1) == 0) {
return xav_nif_raise(env, "failed_to_get_atom");
}

enum AVPixelFormat out_video_fmt = AV_PIX_FMT_NONE;
enum AVSampleFormat out_audo_fmt = AV_SAMPLE_FMT_NONE;
if (media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
out_video_fmt = av_get_pix_fmt(out_format);
if (out_video_fmt == AV_PIX_FMT_NONE) {
return xav_nif_raise(env, "unknown_out_format");
}
} else if (media_type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) {
out_audo_fmt = av_get_sample_fmt(out_format);
if (out_audo_fmt == AV_SAMPLE_FMT_NONE) {
return xav_nif_raise(env, "unknown_out_format");
}
}

// resolve other params
int out_sample_rate;
if (!enif_get_int(env, argv[2], &out_sample_rate)) {
return xav_nif_raise(env, "invalid_out_sample_rate");
Expand All @@ -53,7 +86,8 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
enif_alloc_resource(xav_decoder_resource_type, sizeof(struct XavDecoder));
xav_decoder->decoder = NULL;
xav_decoder->ac = NULL;
xav_decoder->out_format = out_format;
xav_decoder->out_audio_fmt = out_audo_fmt;
xav_decoder->out_video_fmt = out_video_fmt;
xav_decoder->out_sample_rate = out_sample_rate;
xav_decoder->out_channels = out_channels;

Expand All @@ -62,31 +96,31 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_raise(env, "failed_to_allocate_decoder");
}

if (decoder_init(xav_decoder->decoder, codec, xav_decoder->out_format) != 0) {
if (decoder_init(xav_decoder->decoder, media_type, codec_id) != 0) {
return xav_nif_raise(env, "failed_to_init_decoder");
}

ERL_NIF_TERM decoder_term = enif_make_resource(env, xav_decoder);
enif_release_resource(xav_decoder);

XAV_FREE(out_format);

return decoder_term;
}

ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* frame) {
ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *frame) {
ERL_NIF_TERM frame_term;
int ret;

if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) {
XAV_LOG_DEBUG("Converting video to RGB");

int out_pix_fmt = xav_decoder->decoder->out_format;

if (out_pix_fmt == AV_PIX_FMT_NONE) {
if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE) {
return xav_nif_video_frame_to_term(env, frame);
}

AVFrame *dst_frame;
ret = video_converter_convert(frame, &dst_frame, out_pix_fmt);
ret = video_converter_convert(frame, &dst_frame, xav_decoder->out_video_fmt);
if (ret <= 0) {
return xav_nif_raise(env, "failed_to_decode");
}
Expand All @@ -104,7 +138,7 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* fr
if (xav_decoder->ac == NULL) {
ret = init_audio_converter(xav_decoder);
if (ret < 0) {
return xav_nif_raise(env, "failed_to_init_converter");;
return xav_nif_raise(env, "failed_to_init_converter");
}
}

Expand All @@ -113,15 +147,8 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* fr
return xav_nif_raise(env, "failed_to_decode");
}

const char *out_format = av_get_sample_fmt_name(xav_decoder->ac->out_sample_fmt);

if (strcmp(out_format, "flt") == 0) {
out_format = "f32";
} else if (strcmp(out_format, "dbl") == 0) {
out_format = "f64";
}

frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size, out_format, frame->pts);
frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size,
xav_decoder->out_audio_fmt, frame->pts);

av_freep(&out_data[0]);
}
Expand Down Expand Up @@ -229,23 +256,12 @@ static int init_audio_converter(struct XavDecoder *xav_decoder) {
out_sample_rate = xav_decoder->out_sample_rate;
}

enum AVSampleFormat out_sample_fmt;
if (strcmp(xav_decoder->out_format, "u8") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_U8;
} else if (strcmp(xav_decoder->out_format, "s16") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S16;
} else if (strcmp(xav_decoder->out_format, "s32") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S32;
} else if (strcmp(xav_decoder->out_format, "s64") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S64;
} else if (strcmp(xav_decoder->out_format, "f32") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_FLT;
} else if (strcmp(xav_decoder->out_format, "f64") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_DBL;
} else if (strcmp(xav_decoder->out_format, "nil") == 0) {
out_sample_fmt = av_get_alt_sample_fmt(xav_decoder->decoder->c->sample_fmt, 0);
} else {
return -1;
// If user didn't request any specific format,
// just take the original format but in the packed form.
// We need to call this function here, as in the decoder_init we don't know
// what is the sample_fmt yet.
if (xav_decoder->out_audio_fmt == AV_SAMPLE_FMT_NONE) {
xav_decoder->out_audio_fmt = av_get_alt_sample_fmt(xav_decoder->decoder->c->sample_fmt, 0);
}

struct ChannelLayout in_chlayout, out_chlayout;
Expand All @@ -267,7 +283,7 @@ static int init_audio_converter(struct XavDecoder *xav_decoder) {

return audio_converter_init(xav_decoder->ac, in_chlayout, xav_decoder->decoder->c->sample_rate,
xav_decoder->decoder->c->sample_fmt, out_chlayout, out_sample_rate,
out_sample_fmt);
xav_decoder->out_audio_fmt);
}

void free_xav_decoder(ErlNifEnv *env, void *obj) {
Expand Down
5 changes: 4 additions & 1 deletion c_src/xav/xav_decoder.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#include "audio_converter.h"
#include "decoder.h"

#include <libavutil/pixfmt.h>

struct XavDecoder {
struct Decoder *decoder;
struct AudioConverter *ac;
char *out_format;
enum AVPixelFormat out_video_fmt;
enum AVSampleFormat out_audio_fmt;
int out_sample_rate;
int out_channels;
};
13 changes: 3 additions & 10 deletions c_src/xav/xav_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,16 +169,9 @@ ERL_NIF_TERM next_frame(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_raise(env, "failed_to_read");
}

const char *out_format = av_get_sample_fmt_name(xav_reader->ac->out_sample_fmt);

if (strcmp(out_format, "flt") == 0) {
out_format = "f32";
} else if (strcmp(out_format, "dbl") == 0) {
out_format = "f64";
}

frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size, out_format,
xav_reader->reader->frame->pts);
frame_term =
xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size,
xav_reader->ac->out_sample_fmt, xav_reader->reader->frame->pts);
av_freep(&out_data[0]);
}

Expand Down
7 changes: 7 additions & 0 deletions lib/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,15 @@ defmodule Xav.Decoder do
:ok

{:ok, {data, format, width, height, pts}} ->
format = normalize_format(format)
{:ok, Xav.Frame.new(data, format, width, height, pts)}

# Sometimes, audio converter might not return data immediately.
{:ok, {"", _format, _samples, _pts}} ->
:ok

{:ok, {data, format, samples, pts}} ->
format = normalize_format(format)
{:ok, Xav.Frame.new(data, format, samples, pts)}

{:error, _reason} = error ->
Expand Down Expand Up @@ -123,4 +125,9 @@ defmodule Xav.Decoder do
{:error, reason} -> raise "Failed to flush decoder: #{inspect(reason)}"
end
end

# Use the same formats as Nx
defp normalize_format(:flt), do: :f32
defp normalize_format(:dbl), do: :f64
defp normalize_format(other), do: other
end
Loading

0 comments on commit a26f39a

Please sign in to comment.