elixir-webrtc · gBillal · Mar 8, 2025 · Mar 30, 2025 · Mar 30, 2025 · Mar 30, 2025
diff --git a/c_src/xav/channel_layout.c b/c_src/xav/channel_layout.c
@@ -0,0 +1,34 @@
+#include "channel_layout.h"
+
+int xav_get_channel_layout(const char *name, struct ChannelLayout *layout) {
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  if (av_channel_layout_from_string(&layout->layout, name) < 0) {
+    return 0;
+  }
+#else
+  layout->layout = av_get_channel_layout(name);
+  if (layout->layout == 0) {
+    return 0;
+  }
+#endif
+
+  return 1;
+}
+
+int xav_set_channel_layout(AVCodecContext *ctx, struct ChannelLayout *layout) {
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  return av_channel_layout_copy(&ctx->ch_layout, &layout->layout);
+#else
+  ctx->channel_layout = layout->layout;
+  return 0;
+#endif
+}
+
+int xav_set_frame_channel_layout(AVFrame *frame, struct ChannelLayout *layout) {
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  return av_channel_layout_copy(&frame->ch_layout, &layout->layout);
+#else
+  frame->channel_layout = layout->layout;
+  return 0;
+#endif
+}
diff --git a/c_src/xav/channel_layout.h b/c_src/xav/channel_layout.h
@@ -1,6 +1,7 @@
 #ifndef CHANNEL_LAYOUT_H
 #define CHANNEL_LAYOUT_H
 #include <libavutil/channel_layout.h>
+#include <libavcodec/avcodec.h>
 
 struct ChannelLayout {
 #if LIBAVUTIL_VERSION_MAJOR >= 58
@@ -9,4 +10,8 @@ struct ChannelLayout {
   uint64_t layout;
 #endif
 };
-#endif
+
+int xav_get_channel_layout(const char *name, struct ChannelLayout *layout);
+int xav_set_channel_layout(AVCodecContext *ctx, struct ChannelLayout *layout);
+int xav_set_frame_channel_layout(AVFrame *frame, struct ChannelLayout *layout);
+#endif
diff --git a/c_src/xav/encoder.c b/c_src/xav/encoder.c
@@ -23,21 +23,27 @@ int encoder_init(struct Encoder *encoder, struct EncoderConfig *config) {
     return -1;
   }
 
-  encoder->c->width = config->width;
-  encoder->c->height = config->height;
-  encoder->c->pix_fmt = config->format;
-  encoder->c->time_base = config->time_base;
+  if (encoder->codec->type == AVMEDIA_TYPE_VIDEO) {
+    encoder->c->width = config->width;
+    encoder->c->height = config->height;
+    encoder->c->pix_fmt = config->format;
+    encoder->c->time_base = config->time_base;
 
-  if (config->profile != FF_PROFILE_UNKNOWN) {
-    encoder->c->profile = config->profile;
-  }
+    if (config->gop_size > 0) {
+      encoder->c->gop_size = config->gop_size;
+    }
 
-  if (config->gop_size > 0) {
-    encoder->c->gop_size = config->gop_size;
+    if (config->max_b_frames >= 0) {
+      encoder->c->max_b_frames = config->max_b_frames;
+    }
+  } else {
+    encoder->c->sample_fmt = config->sample_format;
+    encoder->c->sample_rate = config->sample_rate;
+    xav_set_channel_layout(encoder->c, &config->channel_layout);
   }
 
-  if (config->max_b_frames >= 0) {
-    encoder->c->max_b_frames = config->max_b_frames;
+  if (config->profile != FF_PROFILE_UNKNOWN) {
+    encoder->c->profile = config->profile;
   }
 
   AVDictionary *opts = NULL;

diff --git a/c_src/xav/encoder.h b/c_src/xav/encoder.h
@@ -1,3 +1,4 @@
+#include "channel_layout.h"
 #include "utils.h"
 #include <libavcodec/avcodec.h>
 
@@ -15,10 +16,13 @@ struct EncoderConfig {
   int width;
   int height;
   enum AVPixelFormat format;
+  enum AVSampleFormat sample_format;
   AVRational time_base;
   int gop_size;
   int max_b_frames;
   int profile;
+  int sample_rate;
+  struct ChannelLayout channel_layout;
 };
 
 struct Encoder *encoder_alloc();

diff --git a/c_src/xav/utils.c b/c_src/xav/utils.c
@@ -93,3 +93,11 @@ ERL_NIF_TERM xav_nif_packet_to_term(ErlNifEnv *env, AVPacket *packet) {
       enif_make_atom(env, packet->flags & AV_PKT_FLAG_KEY ? "true" : "false");
   return enif_make_tuple(env, 4, data_term, dts, pts, is_keyframe);
 }
+
+int xav_get_nb_channels(const AVFrame *frame) {
+  #if LIBAVUTIL_VERSION_MAJOR >= 58
+    return frame->ch_layout.nb_channels;
+  #else
+    return frame->channels;
+  #endif
+  }
diff --git a/c_src/xav/utils.h b/c_src/xav/utils.h
@@ -26,3 +26,4 @@ ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame);
 ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
                                          int out_size, enum AVSampleFormat out_format, int pts);
 ERL_NIF_TERM xav_nif_packet_to_term(ErlNifEnv *env, AVPacket *packet);
+int xav_get_nb_channels(const AVFrame *frame);
diff --git a/c_src/xav/xav_encoder.c b/c_src/xav/xav_encoder.c
@@ -4,7 +4,10 @@ ErlNifResourceType *xav_encoder_resource_type;
 
 static ERL_NIF_TERM packets_to_term(ErlNifEnv *, struct Encoder *);
 static int get_profile(enum AVCodecID, const char *);
-static ERL_NIF_TERM get_codec_profiles(ErlNifEnv *, const AVCodec *);
+static ERL_NIF_TERM codec_get_profiles(ErlNifEnv *, const AVCodec *);
+static ERL_NIF_TERM codec_get_sample_formats(ErlNifEnv *, const AVCodec *);
+static ERL_NIF_TERM codec_get_sample_rates(ErlNifEnv *, const AVCodec *);
+static ERL_NIF_TERM codec_get_channel_layouts(ErlNifEnv *, const AVCodec *);
 
 ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
   if (argc != 2) {
@@ -17,6 +20,7 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
   encoder_config.profile = FF_PROFILE_UNKNOWN;
 
   char *codec_name = NULL, *format = NULL, *profile = NULL;
+  char *channel_layout = NULL;
   int codec_id = 0;
 
   ErlNifMapIterator iter;
@@ -58,6 +62,10 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
       err = xav_nif_get_string(env, value, &profile);
     } else if (strcmp(config_name, "codec_id") == 0) {
       err = enif_get_int(env, value, &codec_id);
+    } else if (strcmp(config_name, "sample_rate") == 0) {
+      err = enif_get_int(env, value, &encoder_config.sample_rate);
+    } else if (strcmp(config_name, "channel_layout") == 0) {
+      err = xav_nif_get_string(env, value, &channel_layout);
     } else {
       ret = xav_nif_raise(env, "unknown_config_key");
       goto clean;
@@ -83,10 +91,23 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     goto clean;
   }
 
-  encoder_config.format = av_get_pix_fmt(format);
-  if (encoder_config.format == AV_PIX_FMT_NONE) {
-    ret = xav_nif_raise(env, "unknown_format");
-    goto clean;
+  if (encoder_config.codec->type == AVMEDIA_TYPE_VIDEO) {
+    encoder_config.format = av_get_pix_fmt(format);
+    if (encoder_config.format == AV_PIX_FMT_NONE) {
+      ret = xav_nif_raise(env, "unknown_format");
+      goto clean;
+    }
+  } else {
+    encoder_config.sample_format = av_get_sample_fmt(format);
+    if (encoder_config.sample_format == AV_SAMPLE_FMT_NONE) {
+      ret = xav_nif_raise(env, "unknown_format");
+      goto clean;
+    }
+
+    if (!xav_get_channel_layout(channel_layout, &encoder_config.channel_layout)) {
+      ret = xav_nif_raise(env, "unknown_channel_layout");
+      goto clean;
+    }
   }
 
   if (profile) {
@@ -100,13 +121,27 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
   struct XavEncoder *xav_encoder =
       enif_alloc_resource(xav_encoder_resource_type, sizeof(struct XavEncoder));
 
-  xav_encoder->frame = av_frame_alloc();
   xav_encoder->encoder = encoder_alloc();
   if (encoder_init(xav_encoder->encoder, &encoder_config) < 0) {
     ret = xav_nif_raise(env, "failed_to_init_encoder");
     goto clean;
   }
 
+  xav_encoder->frame = av_frame_alloc();
+
+  if (encoder_config.codec->type == AVMEDIA_TYPE_AUDIO) {
+    xav_encoder->frame->format = encoder_config.format;
+    xav_encoder->frame->nb_samples = xav_encoder->encoder->c->frame_size;
+    if (xav_set_frame_channel_layout(xav_encoder->frame, &encoder_config.channel_layout) < 0) {
+      ret = xav_nif_raise(env, "failed_to_set_channel_layout");
+      goto clean;
+    }
+    if (av_frame_get_buffer(xav_encoder->frame, 0) < 0) {
+      ret = xav_nif_raise(env, "failed_to_get_buffer");
+      goto clean;
+    }
+  }
+
   ret = enif_make_resource(env, xav_encoder);
   enif_release_resource(xav_encoder);
 
@@ -119,12 +154,16 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     XAV_FREE(config_name);
   if (!profile)
     XAV_FREE(profile);
+  if (!channel_layout)
+    XAV_FREE(channel_layout);
   enif_map_iterator_destroy(env, &iter);
 
   return ret;
 }
 
 ERL_NIF_TERM encode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+  int ret;
+
   if (argc != 3) {
     return xav_nif_raise(env, "invalid_arg_count");
   }
@@ -145,15 +184,26 @@ ERL_NIF_TERM encode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
   }
 
   AVFrame *frame = xav_encoder->frame;
-  frame->width = xav_encoder->encoder->c->width;
-  frame->height = xav_encoder->encoder->c->height;
-  frame->format = xav_encoder->encoder->c->pix_fmt;
-  frame->pts = pts;
+  if (xav_encoder->encoder->codec->type == AVMEDIA_TYPE_VIDEO) {
+    frame->width = xav_encoder->encoder->c->width;
+    frame->height = xav_encoder->encoder->c->height;
+    frame->format = xav_encoder->encoder->c->pix_fmt;
+    frame->pts = pts;
+
+    ret = av_image_fill_arrays(frame->data, frame->linesize, input.data, frame->format,
+                               frame->width, frame->height, 1);
+    if (ret < 0) {
+      return xav_nif_raise(env, "failed_to_fill_arrays");
+    }
+  } else {
+    frame->pts = pts;
+    int nb_channels = xav_get_nb_channels(frame);
+    ret = av_samples_fill_arrays(frame->data, frame->linesize, input.data, nb_channels,
+                                 frame->nb_samples, xav_encoder->encoder->c->sample_fmt, 1);
 
-  int ret = av_image_fill_arrays(frame->data, frame->linesize, input.data, frame->format,
-                                 frame->width, frame->height, 1);
-  if (ret < 0) {
-    return xav_nif_raise(env, "failed_to_fill_arrays");
+    if (ret < 0) {
+      return xav_nif_raise(env, "failed_to_fill_arrays");
+    }
   }
 
   ret = encoder_encode(xav_encoder->encoder, frame);
@@ -197,10 +247,12 @@ ERL_NIF_TERM list_encoders(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
                                    : enif_make_string(env, "", ERL_NIF_LATIN1);
       ERL_NIF_TERM media_type = enif_make_atom(env, av_get_media_type_string(codec->type));
       ERL_NIF_TERM codec_id = enif_make_int64(env, codec->id);
-      ERL_NIF_TERM profiles = get_codec_profiles(env, codec);
+      ERL_NIF_TERM profiles = codec_get_profiles(env, codec);
+      ERL_NIF_TERM sample_formats = codec_get_sample_formats(env, codec);
+      ERL_NIF_TERM sample_rates = codec_get_sample_rates(env, codec);
 
-      ERL_NIF_TERM desc =
-          enif_make_tuple6(env, codec_name, name, long_name, media_type, codec_id, profiles);
+      ERL_NIF_TERM desc = enif_make_tuple8(env, codec_name, name, long_name, media_type, codec_id,
+                                           profiles, sample_formats, sample_rates);
       result = enif_make_list_cell(env, desc, result);
     }
   }
@@ -256,7 +308,7 @@ static int get_profile(enum AVCodecID codec, const char *profile_name) {
   return profile->profile;
 }
 
-static ERL_NIF_TERM get_codec_profiles(ErlNifEnv *env, const AVCodec *codec) {
+static ERL_NIF_TERM codec_get_profiles(ErlNifEnv *env, const AVCodec *codec) {
   ERL_NIF_TERM result = enif_make_list(env, 0);
 
   const AVCodecDescriptor *desc = avcodec_descriptor_get(codec->id);
@@ -276,6 +328,41 @@ static ERL_NIF_TERM get_codec_profiles(ErlNifEnv *env, const AVCodec *codec) {
   return result;
 }
 
+static ERL_NIF_TERM codec_get_sample_formats(ErlNifEnv *env, const AVCodec *codec) {
+  ERL_NIF_TERM result = enif_make_list(env, 0);
+
+  if (codec->type != AVMEDIA_TYPE_AUDIO) {
+    return result;
+  }
+
+  const enum AVSampleFormat *sample_format = codec->sample_fmts;
+  while (*sample_format != AV_SAMPLE_FMT_NONE) {
+    ERL_NIF_TERM format_name = enif_make_atom(env, av_get_sample_fmt_name(*sample_format));
+    result = enif_make_list_cell(env, format_name, result);
+
+    sample_format++;
+  }
+
+  return result;
+}
+
+static ERL_NIF_TERM codec_get_sample_rates(ErlNifEnv *env, const AVCodec *codec) {
+  ERL_NIF_TERM result = enif_make_list(env, 0);
+
+  if (codec->type != AVMEDIA_TYPE_AUDIO || codec->supported_samplerates == NULL) {
+    return result;
+  }
+
+  const int *sample_rate = codec->supported_samplerates;
+
+  while (*sample_rate != 0) {
+    result = enif_make_list_cell(env, enif_make_int(env, *sample_rate), result);
+    sample_rate++;
+  }
+
+  return result;
+}
+
 static ErlNifFunc xav_funcs[] = {{"new", 2, new},
                                  {"encode", 3, encode},
                                  {"flush", 1, flush},

diff --git a/c_src/xav/xav_encoder.h b/c_src/xav/xav_encoder.h
@@ -1,3 +1,4 @@
+#include "channel_layout.c"
-#include "channel_layout.c"
+#include "channel_layout.h"
-#include "channel_layout.c"
+#include "channel_layout.h"
 #include "encoder.h"
 #include "utils.h"
 #include <libavutil/pixfmt.h>

diff --git a/lib/xav.ex b/lib/xav.ex
@@ -6,7 +6,8 @@ defmodule Xav do
           name: atom(),
           long_name: String.t(),
           media_type: atom(),
-          profiles: [String.t()]
+          profiles: [String.t()],
+          sample_formats: [atom()]
         }
 
   @type decoder :: %{
@@ -60,13 +61,16 @@ defmodule Xav do
   @spec list_encoders() :: [encoder()]
   def list_encoders() do
     Xav.Encoder.NIF.list_encoders()
-    |> Enum.map(fn {family_name, name, long_name, media_type, _codec_id, profiles} ->
+    |> Enum.map(fn {family_name, name, long_name, media_type, _codec_id, profiles, sample_formats,
+                    sample_rates} ->
       %{
         codec: family_name,
         name: name,
         long_name: List.to_string(long_name),
         media_type: media_type,
-        profiles: profiles |> Enum.map(&List.to_string/1) |> Enum.reverse()
+        profiles: profiles |> Enum.map(&List.to_string/1) |> Enum.reverse(),
+        sample_formats: Enum.reverse(sample_formats),
+        sample_rates: Enum.reverse(sample_rates)
       }
     end)
     |> Enum.reverse()