FFmpeg  4.4.4
Data Structures | Macros | Functions | Variables
af_speechnorm.c File Reference

Speech Normalizer. More...

#include <float.h>
#include "libavutil/avassert.h"
#include "libavutil/opt.h"
#include "bufferqueue.h"
#include "audio.h"
#include "avfilter.h"
#include "filters.h"
#include "internal.h"

Go to the source code of this file.

Data Structures

struct  PeriodItem
 
struct  ChannelContext
 
struct  SpeechNormalizerContext
 

Macros

#define FF_BUFQUEUE_SIZE   (1024)
 
#define MAX_ITEMS   882000
 
#define MIN_PEAK   (1. / 32768.)
 
#define OFFSET(x)   offsetof(SpeechNormalizerContext, x)
 
#define FLAGS   AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
 
#define ANALYZE_CHANNEL(name, ptype, zero)
 
#define FILTER_CHANNELS(name, ptype)
 
#define FILTER_LINK_CHANNELS(name, ptype)
 

Functions

 AVFILTER_DEFINE_CLASS (speechnorm)
 
static int query_formats (AVFilterContext *ctx)
 
static int get_pi_samples (PeriodItem *pi, int start, int end, int remain)
 
static int available_samples (AVFilterContext *ctx)
 
static void consume_pi (ChannelContext *cc, int nb_samples)
 
static double next_gain (AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
 
static void next_pi (AVFilterContext *ctx, ChannelContext *cc, int bypass)
 
static double min_gain (AVFilterContext *ctx, ChannelContext *cc, int max_size)
 
static double lerp (double min, double max, double mix)
 
static int filter_frame (AVFilterContext *ctx)
 
static int activate (AVFilterContext *ctx)
 
static int config_input (AVFilterLink *inlink)
 
static int process_command (AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
 
static av_cold void uninit (AVFilterContext *ctx)
 

Variables

static const AVOption speechnorm_options []
 
static const AVFilterPad inputs []
 
static const AVFilterPad outputs []
 
AVFilter ff_af_speechnorm
 

Detailed Description

Speech Normalizer.

Definition in file af_speechnorm.c.

Macro Definition Documentation

◆ FF_BUFQUEUE_SIZE

#define FF_BUFQUEUE_SIZE   (1024)

Definition at line 33 of file af_speechnorm.c.

◆ MAX_ITEMS

#define MAX_ITEMS   882000

Definition at line 41 of file af_speechnorm.c.

◆ MIN_PEAK

#define MIN_PEAK   (1. / 32768.)

Definition at line 42 of file af_speechnorm.c.

◆ OFFSET

#define OFFSET (   x)    offsetof(SpeechNormalizerContext, x)

Definition at line 89 of file af_speechnorm.c.

◆ FLAGS

Definition at line 90 of file af_speechnorm.c.

◆ ANALYZE_CHANNEL

#define ANALYZE_CHANNEL (   name,
  ptype,
  zero 
)

Definition at line 251 of file af_speechnorm.c.

◆ FILTER_CHANNELS

#define FILTER_CHANNELS (   name,
  ptype 
)
Value:
static void filter_channels_## name (AVFilterContext *ctx, \
AVFrame *in, int nb_samples) \
{ \
SpeechNormalizerContext *s = ctx->priv; \
AVFilterLink *inlink = ctx->inputs[0]; \
for (int ch = 0; ch < inlink->channels; ch++) { \
ChannelContext *cc = &s->cc[ch]; \
ptype *dst = (ptype *)in->extended_data[ch]; \
const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
int n = 0; \
\
while (n < nb_samples) { \
ptype gain; \
int size; \
next_pi(ctx, cc, bypass); \
size = FFMIN(nb_samples - n, cc->pi_size); \
av_assert0(size > 0); \
gain = cc->gain_state; \
consume_pi(cc, size); \
for (int i = n; i < n + size; i++) \
dst[i] *= gain; \
n += size; \
} \
} \
}
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
#define s(width, name)
Definition: cbs_vp9.c:257
#define FFMIN(a, b)
Definition: common.h:105
uint64_t av_channel_layout_extract_channel(uint64_t channel_layout, int index)
Get the channel with the given index in channel_layout.
for(j=16;j >0;--j)
int i
Definition: input.c:407
const char * name
Definition: qsvenc.c:46
An instance of a filter.
Definition: avfilter.h:341
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
AVFormatContext * ctx
Definition: movenc.c:48
int size

Definition at line 308 of file af_speechnorm.c.

◆ FILTER_LINK_CHANNELS

#define FILTER_LINK_CHANNELS (   name,
  ptype 
)

Definition at line 345 of file af_speechnorm.c.

Function Documentation

◆ AVFILTER_DEFINE_CLASS()

AVFILTER_DEFINE_CLASS ( speechnorm  )

◆ query_formats()

static int query_formats ( AVFilterContext ctx)
static

Definition at line 116 of file af_speechnorm.c.

◆ get_pi_samples()

static int get_pi_samples ( PeriodItem pi,
int  start,
int  end,
int  remain 
)
static

Definition at line 146 of file af_speechnorm.c.

Referenced by available_samples().

◆ available_samples()

static int available_samples ( AVFilterContext ctx)
static

Definition at line 167 of file af_speechnorm.c.

Referenced by activate(), and filter_frame().

◆ consume_pi()

static void consume_pi ( ChannelContext cc,
int  nb_samples 
)
static

Definition at line 183 of file af_speechnorm.c.

◆ next_gain()

static double next_gain ( AVFilterContext ctx,
double  pi_max_peak,
int  bypass,
double  state 
)
static

Definition at line 192 of file af_speechnorm.c.

Referenced by min_gain(), and next_pi().

◆ next_pi()

static void next_pi ( AVFilterContext ctx,
ChannelContext cc,
int  bypass 
)
static

Definition at line 208 of file af_speechnorm.c.

◆ min_gain()

static double min_gain ( AVFilterContext ctx,
ChannelContext cc,
int  max_size 
)
static

Definition at line 228 of file af_speechnorm.c.

◆ lerp()

static double lerp ( double  min,
double  max,
double  mix 
)
static

Definition at line 340 of file af_speechnorm.c.

◆ filter_frame()

static int filter_frame ( AVFilterContext ctx)
static

Definition at line 399 of file af_speechnorm.c.

Referenced by activate().

◆ activate()

static int activate ( AVFilterContext ctx)
static

Definition at line 450 of file af_speechnorm.c.

◆ config_input()

static int config_input ( AVFilterLink inlink)
static

Definition at line 490 of file af_speechnorm.c.

◆ process_command()

static int process_command ( AVFilterContext ctx,
const char *  cmd,
const char *  args,
char *  res,
int  res_len,
int  flags 
)
static

Definition at line 527 of file af_speechnorm.c.

◆ uninit()

static av_cold void uninit ( AVFilterContext ctx)
static

Definition at line 543 of file af_speechnorm.c.

Variable Documentation

◆ speechnorm_options

const AVOption speechnorm_options[]
static
Initial value:
= {
{ "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
{ "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
{ "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
{ "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
{ "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
{ "h", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
{ "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ NULL }
}
#define FLAGS
Definition: af_speechnorm.c:90
#define OFFSET(x)
Definition: af_speechnorm.c:89
channels
Definition: aptx.h:33
static void invert(float *h, int n)
Definition: asrc_sinc.c:201
#define NULL
Definition: coverity.c:32
@ AV_OPT_TYPE_CHANNEL_LAYOUT
Definition: opt.h:241
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
@ AV_OPT_TYPE_BOOL
Definition: opt.h:242

Definition at line 92 of file af_speechnorm.c.

◆ inputs

const AVFilterPad inputs[]
static
Initial value:
= {
{
.name = "default",
.config_props = config_input,
},
{ NULL }
}
static int config_input(AVFilterLink *inlink)
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202

Definition at line 551 of file af_speechnorm.c.

◆ outputs

const AVFilterPad outputs[]
static
Initial value:
= {
{
.name = "default",
},
{ NULL }
}

Definition at line 560 of file af_speechnorm.c.

◆ ff_af_speechnorm

AVFilter ff_af_speechnorm
Initial value:
= {
.name = "speechnorm",
.description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
.query_formats = query_formats,
.priv_size = sizeof(SpeechNormalizerContext),
.priv_class = &speechnorm_class,
}
static int query_formats(AVFilterContext *ctx)
static const AVFilterPad inputs[]
static const AVFilterPad outputs[]
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
static int activate(AVFilterContext *ctx)
static av_cold void uninit(AVFilterContext *ctx)
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117

Definition at line 568 of file af_speechnorm.c.