103 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
104 #define AF AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
133 new_sum -= *
s->window_current;
136 return new_sum /
s->window_size;
141 s->sum -= *
s->window_current;
143 s->sum += *
s->window_current;
146 if (
s->window_current >=
s->window_end)
147 s->window_current =
s->window;
155 new_sum -= *
s->window_current;
158 return sqrt(new_sum /
s->window_size);
163 s->sum -= *
s->window_current;
165 s->sum += *
s->window_current;
168 if (
s->window_current >=
s->window_end)
169 s->window_current =
s->window;
176 if (
s->stop_periods < 0) {
177 s->stop_periods = -
s->stop_periods;
181 switch (
s->detection) {
197 memset(
s->window, 0,
s->window_size *
sizeof(*
s->window));
199 s->window_current =
s->window;
200 s->window_end =
s->window +
s->window_size;
227 sizeof(*
s->start_holdoff) *
229 if (!
s->start_holdoff)
233 sizeof(*
s->start_silence_hold) *
235 if (!
s->start_silence_hold)
238 s->start_holdoff_offset = 0;
239 s->start_holdoff_end = 0;
240 s->start_found_periods = 0;
243 sizeof(*
s->stop_holdoff) *
245 if (!
s->stop_holdoff)
249 sizeof(*
s->stop_silence_hold) *
251 if (!
s->stop_silence_hold)
254 s->stop_holdoff_offset = 0;
255 s->stop_holdoff_end = 0;
256 s->stop_found_periods = 0;
258 if (
s->start_periods)
268 int *nb_samples_written,
int *ret,
int flush_silence)
272 if (*nb_samples_written) {
273 out->nb_samples = *nb_samples_written / outlink->
channels;
275 out->pts =
s->next_pts;
283 *nb_samples_written = 0;
288 if (
s->stop_silence_end <= 0 || !flush_silence)
297 if (
s->stop_silence_offset <
s->stop_silence_end) {
298 memcpy(silence->
data[0],
299 &
s->stop_silence_hold[
s->stop_silence_offset],
300 (
s->stop_silence_end -
s->stop_silence_offset) *
sizeof(
double));
303 if (
s->stop_silence_offset > 0) {
304 memcpy(silence->
data[0] + (
s->stop_silence_end -
s->stop_silence_offset) *
sizeof(
double),
305 &
s->stop_silence_hold[0],
306 s->stop_silence_offset *
sizeof(
double));
309 s->stop_silence_offset = 0;
310 s->stop_silence_end = 0;
312 silence->
pts =
s->next_pts;
325 int i, j, threshold, ret = 0;
326 int nbs, nb_samples_read, nb_samples_written;
327 double *obuf, *ibuf = (
double *)
in->data[0];
330 nb_samples_read = nb_samples_written = 0;
333 s->next_pts =
in->pts;
338 nbs =
in->nb_samples - nb_samples_read / outlink->
channels;
342 for (
i = 0;
i < nbs;
i++) {
343 if (
s->start_mode ==
T_ANY) {
345 for (j = 0; j < outlink->
channels; j++) {
346 threshold |=
s->compute(
s, ibuf[j]) >
s->start_threshold;
350 for (j = 0; j < outlink->
channels; j++) {
351 threshold &=
s->compute(
s, ibuf[j]) >
s->start_threshold;
356 for (j = 0; j < outlink->
channels; j++) {
358 s->start_holdoff[
s->start_holdoff_end++] = *ibuf++;
360 nb_samples_read += outlink->
channels;
362 if (
s->start_holdoff_end >=
s->start_duration * outlink->
channels) {
363 if (++
s->start_found_periods >=
s->start_periods) {
365 goto silence_trim_flush;
368 s->start_holdoff_offset = 0;
369 s->start_holdoff_end = 0;
370 s->start_silence_offset = 0;
371 s->start_silence_end = 0;
374 s->start_holdoff_end = 0;
376 for (j = 0; j < outlink->
channels; j++) {
377 s->update(
s, ibuf[j]);
378 if (
s->start_silence) {
379 s->start_silence_hold[
s->start_silence_offset++] = ibuf[j];
380 s->start_silence_end =
FFMIN(
s->start_silence_end + 1, outlink->
channels *
s->start_silence);
381 if (
s->start_silence_offset >= outlink->
channels *
s->start_silence) {
382 s->start_silence_offset = 0;
388 nb_samples_read += outlink->
channels;
395 nbs =
s->start_holdoff_end -
s->start_holdoff_offset;
406 if (
s->start_silence_end > 0) {
407 if (
s->start_silence_offset <
s->start_silence_end) {
409 &
s->start_silence_hold[
s->start_silence_offset],
410 (
s->start_silence_end -
s->start_silence_offset) *
sizeof(
double));
413 if (
s->start_silence_offset > 0) {
414 memcpy(
out->data[0] + (
s->start_silence_end -
s->start_silence_offset) *
sizeof(
double),
415 &
s->start_silence_hold[0],
416 s->start_silence_offset *
sizeof(
double));
420 memcpy(
out->data[0] +
s->start_silence_end *
sizeof(
double),
421 &
s->start_holdoff[
s->start_holdoff_offset],
422 nbs *
sizeof(
double));
424 out->pts =
s->next_pts;
429 s->start_holdoff_offset += nbs;
433 if (
s->start_holdoff_offset ==
s->start_holdoff_end) {
434 s->start_holdoff_offset = 0;
435 s->start_holdoff_end = 0;
436 s->start_silence_offset = 0;
437 s->start_silence_end = 0;
445 nbs =
in->nb_samples - nb_samples_read / outlink->
channels;
454 obuf = (
double *)
out->data[0];
456 if (
s->stop_periods) {
457 for (
i = 0;
i < nbs;
i++) {
458 if (
s->stop_mode ==
T_ANY) {
460 for (j = 0; j < outlink->
channels; j++) {
461 threshold |=
s->compute(
s, ibuf[j]) >
s->stop_threshold;
465 for (j = 0; j < outlink->
channels; j++) {
466 threshold &=
s->compute(
s, ibuf[j]) >
s->stop_threshold;
470 if (threshold &&
s->stop_holdoff_end && !
s->stop_silence) {
472 flush(
s,
out, outlink, &nb_samples_written, &ret, 0);
473 goto silence_copy_flush;
474 }
else if (threshold) {
475 for (j = 0; j < outlink->
channels; j++) {
479 nb_samples_read += outlink->
channels;
480 nb_samples_written += outlink->
channels;
481 }
else if (!threshold) {
482 for (j = 0; j < outlink->
channels; j++) {
484 if (
s->stop_silence) {
485 s->stop_silence_hold[
s->stop_silence_offset++] = *ibuf;
486 s->stop_silence_end =
FFMIN(
s->stop_silence_end + 1, outlink->
channels *
s->stop_silence);
487 if (
s->stop_silence_offset >= outlink->
channels *
s->stop_silence) {
488 s->stop_silence_offset = 0;
492 s->stop_holdoff[
s->stop_holdoff_end++] = *ibuf++;
494 nb_samples_read += outlink->
channels;
496 if (
s->stop_holdoff_end >=
s->stop_duration * outlink->
channels) {
497 if (++
s->stop_found_periods >=
s->stop_periods) {
498 s->stop_holdoff_offset = 0;
499 s->stop_holdoff_end = 0;
503 flush(
s,
out, outlink, &nb_samples_written, &ret, 1);
506 s->stop_found_periods = 0;
507 s->start_found_periods = 0;
508 s->start_holdoff_offset = 0;
509 s->start_holdoff_end = 0;
510 s->start_silence_offset = 0;
511 s->start_silence_end = 0;
514 flush(
s,
out, outlink, &nb_samples_written, &ret, 1);
519 flush(
s,
out, outlink, &nb_samples_written, &ret, 0);
520 goto silence_copy_flush;
524 flush(
s,
out, outlink, &nb_samples_written, &ret, 0);
526 memcpy(obuf, ibuf,
sizeof(
double) * nbs * outlink->
channels);
528 out->pts =
s->next_pts;
539 nbs =
s->stop_holdoff_end -
s->stop_holdoff_offset;
550 memcpy(
out->data[0], &
s->stop_holdoff[
s->stop_holdoff_offset],
551 nbs *
sizeof(
double));
552 s->stop_holdoff_offset += nbs;
554 out->pts =
s->next_pts;
561 if (
s->stop_holdoff_offset ==
s->stop_holdoff_end) {
562 s->stop_holdoff_offset = 0;
563 s->stop_holdoff_end = 0;
564 s->stop_silence_offset = 0;
565 s->stop_silence_end = 0;
589 int nbs =
s->stop_holdoff_end -
s->stop_holdoff_offset;
597 memcpy(
frame->
data[0], &
s->stop_holdoff[
s->stop_holdoff_offset],
598 nbs *
sizeof(
double));
672 .
name =
"silenceremove",
675 .priv_class = &silenceremove_class,
static enum AVSampleFormat sample_fmts[]
static const AVFilterPad inputs[]
static const AVFilterPad outputs[]
AVFILTER_DEFINE_CLASS(silenceremove)
static const AVFilterPad silenceremove_outputs[]
static void update_rms(SilenceRemoveContext *s, double sample)
AVFilter ff_af_silenceremove
static int query_formats(AVFilterContext *ctx)
static int config_input(AVFilterLink *inlink)
static int request_frame(AVFilterLink *outlink)
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
static double compute_peak(SilenceRemoveContext *s, double sample)
static double compute_rms(SilenceRemoveContext *s, double sample)
static void update_peak(SilenceRemoveContext *s, double sample)
static av_cold int init(AVFilterContext *ctx)
static av_cold void uninit(AVFilterContext *ctx)
static void clear_window(SilenceRemoveContext *s)
static void flush(SilenceRemoveContext *s, AVFrame *out, AVFilterLink *outlink, int *nb_samples_written, int *ret, int flush_silence)
static const AVFilterPad silenceremove_inputs[]
static const AVOption silenceremove_options[]
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Main libavfilter public API header.
static __device__ float fabs(float a)
#define AVERROR_EOF
End of file.
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
Rescale a 64-bit integer with rounding to nearest.
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
AVSampleFormat
Audio sample formats.
@ AV_SAMPLE_FMT_DBL
double
#define AV_NOPTS_VALUE
Undefined timestamp value.
#define AV_TIME_BASE
Internal time base represented as integer.
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
enum MovChannelLayoutTag * layouts
typedef void(RENAME(mix_any_func_type))
Describe the class of an AVClass context structure.
A list of supported channel layouts.
A link between two filters.
int channels
Number of channels.
AVFilterContext * src
source filter
AVRational time_base
Define the time base used by the PTS of the frames/samples which will pass through this link.
int sample_rate
samples per second
AVFilterContext * dst
dest filter
A filter pad used for either input or output.
const char * name
Pad name.
const char * name
Filter name.
This structure describes decoded (raw) audio or video data.
int nb_samples
number of audio samples (per channel) described by this frame
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Rational number (pair of numerator and denominator).
int64_t start_silence_opt
size_t stop_holdoff_offset
size_t stop_silence_offset
double * stop_silence_hold
int64_t start_duration_opt
int64_t stop_duration_opt
void(* update)(struct SilenceRemoveContext *s, double sample)
size_t start_holdoff_offset
double(* compute)(struct SilenceRemoveContext *s, double sample)
double * start_silence_hold
size_t start_silence_offset
#define av_malloc_array(a, b)
timestamp utils, mostly useful for debugging/logging purposes