FFmpeg  4.4.4
af_speechnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Paul B Mahol
3  *
4  * Speech Normalizer
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * Speech Normalizer
26  */
27 
28 #include <float.h>
29 
30 #include "libavutil/avassert.h"
31 #include "libavutil/opt.h"
32 
33 #define FF_BUFQUEUE_SIZE (1024)
34 #include "bufferqueue.h"
35 
36 #include "audio.h"
37 #include "avfilter.h"
38 #include "filters.h"
39 #include "internal.h"
40 
41 #define MAX_ITEMS 882000
42 #define MIN_PEAK (1. / 32768.)
43 
44 typedef struct PeriodItem {
45  int size;
46  int type;
47  double max_peak;
48 } PeriodItem;
49 
50 typedef struct ChannelContext {
51  int state;
52  int bypass;
54  double gain_state;
55  double pi_max_peak;
56  int pi_start;
57  int pi_end;
58  int pi_size;
60 
61 typedef struct SpeechNormalizerContext {
62  const AVClass *class;
63 
64  double peak_value;
65  double max_expansion;
68  double raise_amount;
69  double fall_amount;
70  uint64_t channels;
71  int invert;
72  int link;
73 
75  double prev_gain;
76 
78  int eof;
79  int64_t pts;
80 
81  struct FFBufQueue queue;
82 
84  const uint8_t *srcp, int nb_samples);
86  AVFrame *in, int nb_samples);
88 
89 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
90 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
91 
92 static const AVOption speechnorm_options[] = {
93  { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
94  { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
95  { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
96  { "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
97  { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
98  { "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
99  { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
100  { "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
101  { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
102  { "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
103  { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
104  { "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
105  { "channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
106  { "h", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
107  { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
108  { "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
109  { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
110  { "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
111  { NULL }
112 };
113 
115 
117 {
120  static const enum AVSampleFormat sample_fmts[] = {
123  };
124  int ret;
125 
127  if (!layouts)
128  return AVERROR(ENOMEM);
130  if (ret < 0)
131  return ret;
132 
134  if (!formats)
135  return AVERROR(ENOMEM);
137  if (ret < 0)
138  return ret;
139 
141  if (!formats)
142  return AVERROR(ENOMEM);
144 }
145 
146 static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
147 {
148  int sum;
149 
150  if (pi[start].type == 0)
151  return remain;
152 
153  sum = remain;
154  while (start != end) {
155  start++;
156  if (start >= MAX_ITEMS)
157  start = 0;
158  if (pi[start].type == 0)
159  break;
160  av_assert0(pi[start].size > 0);
161  sum += pi[start].size;
162  }
163 
164  return sum;
165 }
166 
168 {
169  SpeechNormalizerContext *s = ctx->priv;
170  AVFilterLink *inlink = ctx->inputs[0];
171  int min_pi_nb_samples;
172 
173  min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
174  for (int ch = 1; ch < inlink->channels && min_pi_nb_samples > 0; ch++) {
175  ChannelContext *cc = &s->cc[ch];
176 
177  min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
178  }
179 
180  return min_pi_nb_samples;
181 }
182 
183 static void consume_pi(ChannelContext *cc, int nb_samples)
184 {
185  if (cc->pi_size >= nb_samples) {
186  cc->pi_size -= nb_samples;
187  } else {
188  av_assert0(0);
189  }
190 }
191 
192 static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
193 {
194  SpeechNormalizerContext *s = ctx->priv;
195  const double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
196  const double compression = 1. / s->max_compression;
197  const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
198 
199  if (bypass) {
200  return 1.;
201  } else if (type) {
202  return FFMIN(expansion, state + s->raise_amount);
203  } else {
204  return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
205  }
206 }
207 
208 static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
209 {
210  av_assert0(cc->pi_size >= 0);
211  if (cc->pi_size == 0) {
212  SpeechNormalizerContext *s = ctx->priv;
213  int start = cc->pi_start;
214 
215  av_assert0(cc->pi[start].size > 0);
216  av_assert0(cc->pi[start].type > 0 || s->eof);
217  cc->pi_size = cc->pi[start].size;
218  cc->pi_max_peak = cc->pi[start].max_peak;
219  av_assert0(cc->pi_start != cc->pi_end || s->eof);
220  start++;
221  if (start >= MAX_ITEMS)
222  start = 0;
223  cc->pi_start = start;
224  cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state);
225  }
226 }
227 
228 static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
229 {
230  SpeechNormalizerContext *s = ctx->priv;
231  double min_gain = s->max_expansion;
232  double gain_state = cc->gain_state;
233  int size = cc->pi_size;
234  int idx = cc->pi_start;
235 
236  min_gain = FFMIN(min_gain, gain_state);
237  while (size <= max_size) {
238  if (idx == cc->pi_end)
239  break;
240  gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state);
241  min_gain = FFMIN(min_gain, gain_state);
242  size += cc->pi[idx].size;
243  idx++;
244  if (idx >= MAX_ITEMS)
245  idx = 0;
246  }
247 
248  return min_gain;
249 }
250 
251 #define ANALYZE_CHANNEL(name, ptype, zero) \
252 static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc, \
253  const uint8_t *srcp, int nb_samples) \
254 { \
255  SpeechNormalizerContext *s = ctx->priv; \
256  const ptype *src = (const ptype *)srcp; \
257  int n = 0; \
258  \
259  if (cc->state < 0) \
260  cc->state = src[0] >= zero; \
261  \
262  while (n < nb_samples) { \
263  if ((cc->state != (src[n] >= zero)) || \
264  (cc->pi[cc->pi_end].size > s->max_period)) { \
265  double max_peak = cc->pi[cc->pi_end].max_peak; \
266  int state = cc->state; \
267  cc->state = src[n] >= zero; \
268  av_assert0(cc->pi[cc->pi_end].size > 0); \
269  if (cc->pi[cc->pi_end].max_peak >= MIN_PEAK || \
270  cc->pi[cc->pi_end].size > s->max_period) { \
271  cc->pi[cc->pi_end].type = 1; \
272  cc->pi_end++; \
273  if (cc->pi_end >= MAX_ITEMS) \
274  cc->pi_end = 0; \
275  if (cc->state != state) \
276  cc->pi[cc->pi_end].max_peak = DBL_MIN; \
277  else \
278  cc->pi[cc->pi_end].max_peak = max_peak; \
279  cc->pi[cc->pi_end].type = 0; \
280  cc->pi[cc->pi_end].size = 0; \
281  av_assert0(cc->pi_end != cc->pi_start); \
282  } \
283  } \
284  \
285  if (cc->state) { \
286  while (src[n] >= zero) { \
287  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, src[n]); \
288  cc->pi[cc->pi_end].size++; \
289  n++; \
290  if (n >= nb_samples) \
291  break; \
292  } \
293  } else { \
294  while (src[n] < zero) { \
295  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, -src[n]); \
296  cc->pi[cc->pi_end].size++; \
297  n++; \
298  if (n >= nb_samples) \
299  break; \
300  } \
301  } \
302  } \
303 }
304 
305 ANALYZE_CHANNEL(dbl, double, 0.0)
306 ANALYZE_CHANNEL(flt, float, 0.f)
307 
308 #define FILTER_CHANNELS(name, ptype) \
309 static void filter_channels_## name (AVFilterContext *ctx, \
310  AVFrame *in, int nb_samples) \
311 { \
312  SpeechNormalizerContext *s = ctx->priv; \
313  AVFilterLink *inlink = ctx->inputs[0]; \
314  \
315  for (int ch = 0; ch < inlink->channels; ch++) { \
316  ChannelContext *cc = &s->cc[ch]; \
317  ptype *dst = (ptype *)in->extended_data[ch]; \
318  const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
319  int n = 0; \
320  \
321  while (n < nb_samples) { \
322  ptype gain; \
323  int size; \
324  \
325  next_pi(ctx, cc, bypass); \
326  size = FFMIN(nb_samples - n, cc->pi_size); \
327  av_assert0(size > 0); \
328  gain = cc->gain_state; \
329  consume_pi(cc, size); \
330  for (int i = n; i < n + size; i++) \
331  dst[i] *= gain; \
332  n += size; \
333  } \
334  } \
335 }
336 
337 FILTER_CHANNELS(dbl, double)
338 FILTER_CHANNELS(flt, float)
339 
340 static double lerp(double min, double max, double mix)
341 {
342  return min + (max - min) * mix;
343 }
344 
345 #define FILTER_LINK_CHANNELS(name, ptype) \
346 static void filter_link_channels_## name (AVFilterContext *ctx, \
347  AVFrame *in, int nb_samples) \
348 { \
349  SpeechNormalizerContext *s = ctx->priv; \
350  AVFilterLink *inlink = ctx->inputs[0]; \
351  int n = 0; \
352  \
353  while (n < nb_samples) { \
354  int min_size = nb_samples - n; \
355  int max_size = 1; \
356  ptype gain = s->max_expansion; \
357  \
358  for (int ch = 0; ch < inlink->channels; ch++) { \
359  ChannelContext *cc = &s->cc[ch]; \
360  \
361  cc->bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
362  \
363  next_pi(ctx, cc, cc->bypass); \
364  min_size = FFMIN(min_size, cc->pi_size); \
365  max_size = FFMAX(max_size, cc->pi_size); \
366  } \
367  \
368  av_assert0(min_size > 0); \
369  for (int ch = 0; ch < inlink->channels; ch++) { \
370  ChannelContext *cc = &s->cc[ch]; \
371  \
372  if (cc->bypass) \
373  continue; \
374  gain = FFMIN(gain, min_gain(ctx, cc, max_size)); \
375  } \
376  \
377  for (int ch = 0; ch < inlink->channels; ch++) { \
378  ChannelContext *cc = &s->cc[ch]; \
379  ptype *dst = (ptype *)in->extended_data[ch]; \
380  \
381  consume_pi(cc, min_size); \
382  if (cc->bypass) \
383  continue; \
384  \
385  for (int i = n; i < n + min_size; i++) { \
386  ptype g = lerp(s->prev_gain, gain, (i - n) / (double)min_size); \
387  dst[i] *= g; \
388  } \
389  } \
390  \
391  s->prev_gain = gain; \
392  n += min_size; \
393  } \
394 }
395 
396 FILTER_LINK_CHANNELS(dbl, double)
397 FILTER_LINK_CHANNELS(flt, float)
398 
400 {
401  SpeechNormalizerContext *s = ctx->priv;
402  AVFilterLink *outlink = ctx->outputs[0];
403  AVFilterLink *inlink = ctx->inputs[0];
404  int ret;
405 
406  while (s->queue.available > 0) {
407  int min_pi_nb_samples;
408  AVFrame *in;
409 
410  in = ff_bufqueue_peek(&s->queue, 0);
411  if (!in)
412  break;
413 
414  min_pi_nb_samples = available_samples(ctx);
415  if (min_pi_nb_samples < in->nb_samples && !s->eof)
416  break;
417 
418  in = ff_bufqueue_get(&s->queue);
419 
421 
422  s->filter_channels[s->link](ctx, in, in->nb_samples);
423 
424  s->pts = in->pts + in->nb_samples;
425 
426  return ff_filter_frame(outlink, in);
427  }
428 
429  for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
430  AVFrame *in;
431 
432  ret = ff_inlink_consume_frame(inlink, &in);
433  if (ret < 0)
434  return ret;
435  if (ret == 0)
436  break;
437 
438  ff_bufqueue_add(ctx, &s->queue, in);
439 
440  for (int ch = 0; ch < inlink->channels; ch++) {
441  ChannelContext *cc = &s->cc[ch];
442 
443  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
444  }
445  }
446 
447  return 1;
448 }
449 
451 {
452  AVFilterLink *inlink = ctx->inputs[0];
453  AVFilterLink *outlink = ctx->outputs[0];
454  SpeechNormalizerContext *s = ctx->priv;
455  int ret, status;
456  int64_t pts;
457 
458  FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
459 
460  ret = filter_frame(ctx);
461  if (ret <= 0)
462  return ret;
463 
464  if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
465  if (status == AVERROR_EOF)
466  s->eof = 1;
467  }
468 
469  if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
470  s->queue.available == 0) {
471  ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
472  return 0;
473  }
474 
475  if (s->queue.available > 0) {
476  AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
477  const int nb_samples = available_samples(ctx);
478 
479  if (nb_samples >= in->nb_samples || s->eof) {
481  return 0;
482  }
483  }
484 
485  FF_FILTER_FORWARD_WANTED(outlink, inlink);
486 
487  return FFERROR_NOT_READY;
488 }
489 
490 static int config_input(AVFilterLink *inlink)
491 {
492  AVFilterContext *ctx = inlink->dst;
493  SpeechNormalizerContext *s = ctx->priv;
494 
495  s->max_period = inlink->sample_rate / 10;
496 
497  s->prev_gain = 1.;
498  s->cc = av_calloc(inlink->channels, sizeof(*s->cc));
499  if (!s->cc)
500  return AVERROR(ENOMEM);
501 
502  for (int ch = 0; ch < inlink->channels; ch++) {
503  ChannelContext *cc = &s->cc[ch];
504 
505  cc->state = -1;
506  cc->gain_state = 1.;
507  }
508 
509  switch (inlink->format) {
510  case AV_SAMPLE_FMT_FLTP:
511  s->analyze_channel = analyze_channel_flt;
512  s->filter_channels[0] = filter_channels_flt;
513  s->filter_channels[1] = filter_link_channels_flt;
514  break;
515  case AV_SAMPLE_FMT_DBLP:
516  s->analyze_channel = analyze_channel_dbl;
517  s->filter_channels[0] = filter_channels_dbl;
518  s->filter_channels[1] = filter_link_channels_dbl;
519  break;
520  default:
521  av_assert0(0);
522  }
523 
524  return 0;
525 }
526 
527 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
528  char *res, int res_len, int flags)
529 {
530  SpeechNormalizerContext *s = ctx->priv;
531  int link = s->link;
532  int ret;
533 
534  ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
535  if (ret < 0)
536  return ret;
537  if (link != s->link)
538  s->prev_gain = 1.;
539 
540  return 0;
541 }
542 
544 {
545  SpeechNormalizerContext *s = ctx->priv;
546 
547  ff_bufqueue_discard_all(&s->queue);
548  av_freep(&s->cc);
549 }
550 
551 static const AVFilterPad inputs[] = {
552  {
553  .name = "default",
554  .type = AVMEDIA_TYPE_AUDIO,
555  .config_props = config_input,
556  },
557  { NULL }
558 };
559 
560 static const AVFilterPad outputs[] = {
561  {
562  .name = "default",
563  .type = AVMEDIA_TYPE_AUDIO,
564  },
565  { NULL }
566 };
567 
569  .name = "speechnorm",
570  .description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
571  .query_formats = query_formats,
572  .priv_size = sizeof(SpeechNormalizerContext),
573  .priv_class = &speechnorm_class,
574  .activate = activate,
575  .uninit = uninit,
576  .inputs = inputs,
577  .outputs = outputs,
579 };
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:925
#define ANALYZE_CHANNEL(name, ptype, zero)
static int available_samples(AVFilterContext *ctx)
#define MAX_ITEMS
Definition: af_speechnorm.c:41
#define FILTER_LINK_CHANNELS(name, ptype)
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
static int query_formats(AVFilterContext *ctx)
static void consume_pi(ChannelContext *cc, int nb_samples)
static const AVOption speechnorm_options[]
Definition: af_speechnorm.c:92
static int config_input(AVFilterLink *inlink)
#define FLAGS
Definition: af_speechnorm.c:90
static const AVFilterPad inputs[]
AVFilter ff_af_speechnorm
static const AVFilterPad outputs[]
static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
static int activate(AVFilterContext *ctx)
static av_cold void uninit(AVFilterContext *ctx)
static int filter_frame(AVFilterContext *ctx)
static double lerp(double min, double max, double mix)
#define FILTER_CHANNELS(name, ptype)
#define OFFSET(x)
Definition: af_speechnorm.c:89
AVFILTER_DEFINE_CLASS(speechnorm)
static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
channels
Definition: aptx.h:33
static void invert(float *h, int n)
Definition: asrc_sinc.c:201
#define av_cold
Definition: attributes.h:88
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
uint8_t
simple assert() macros that are a bit more flexible than ISO C assert().
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1449
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1096
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:882
size_t ff_inlink_queued_frames(AVFilterLink *link)
Get the number of frames available on the link.
Definition: avfilter.c:1464
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:193
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1494
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1474
Main libavfilter public API header.
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
#define flags(name, subs,...)
Definition: cbs_av1.c:561
#define s(width, name)
Definition: cbs_vp9.c:257
#define f(width, name)
Definition: cbs_vp9.c:255
static struct @321 state
#define FFMIN(a, b)
Definition: common.h:105
#define FFMAX(a, b)
Definition: common.h:103
#define NULL
Definition: coverity.c:32
#define max(a, b)
Definition: cuda_runtime.h:33
#define FF_FILTER_FORWARD_WANTED(outlink, inlink)
Forward the frame_wanted_out flag from an output link to an input link.
Definition: filters.h:254
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
#define FFERROR_NOT_READY
Filters implementation helper functions.
Definition: filters.h:34
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition.
Definition: formats.c:436
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:587
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:575
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *channel_layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates.
Definition: formats.c:568
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:421
@ AV_OPT_TYPE_CHANNEL_LAYOUT
Definition: opt.h:241
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
@ AV_OPT_TYPE_BOOL
Definition: opt.h:242
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define AVERROR(e)
Definition: error.h:43
int av_frame_make_writable(AVFrame *frame)
Ensure that the frame data is writable, avoiding data copy if possible.
Definition: frame.c:611
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:245
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:69
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:70
cl_device_type type
static int mix(int c0, int c1)
Definition: 4xm.c:715
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
AVOptions.
typedef void(RENAME(mix_any_func_type))
formats
Definition: signature.h:48
Describe the class of an AVClass context structure.
Definition: log.h:67
A list of supported channel layouts.
Definition: formats.h:86
An instance of a filter.
Definition: avfilter.h:341
A list of supported formats for one end of a filter link.
Definition: formats.h:65
A filter pad used for either input or output.
Definition: internal.h:54
const char * name
Pad name.
Definition: internal.h:60
Filter definition.
Definition: avfilter.h:145
const char * name
Filter name.
Definition: avfilter.h:149
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
AVOption.
Definition: opt.h:248
PeriodItem pi[MAX_ITEMS]
Definition: af_speechnorm.c:53
double pi_max_peak
Definition: af_speechnorm.c:55
Structure holding the queue.
Definition: bufferqueue.h:49
double max_peak
Definition: af_speechnorm.c:47
void(* filter_channels[2])(AVFilterContext *ctx, AVFrame *in, int nb_samples)
Definition: af_speechnorm.c:85
struct FFBufQueue queue
Definition: af_speechnorm.c:81
void(* analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples)
Definition: af_speechnorm.c:83
ChannelContext * cc
Definition: af_speechnorm.c:74
#define av_freep(p)
AVFormatContext * ctx
Definition: movenc.c:48
static int64_t pts
int size
float min