AOMedia AV1 Codec
svc_encoder_rtc
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 // This is an example demonstrating how to implement a multi-layer AOM
13 // encoding scheme for RTC video applications.
14 
15 #include <assert.h>
16 #include <limits.h>
17 #include <math.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include <memory>
23 
24 #include "config/aom_config.h"
25 
26 #if CONFIG_AV1_DECODER
27 #include "aom/aom_decoder.h"
28 #endif
29 #include "aom/aom_encoder.h"
30 #include "aom/aomcx.h"
31 #include "common/args.h"
32 #include "common/tools_common.h"
33 #include "common/video_writer.h"
34 #include "examples/encoder_util.h"
35 #include "aom_ports/aom_timer.h"
36 #include "av1/ratectrl_rtc.h"
37 
38 #define OPTION_BUFFER_SIZE 1024
39 
40 typedef struct {
41  const char *output_filename;
42  char options[OPTION_BUFFER_SIZE];
43  struct AvxInputContext input_ctx;
44  int speed;
45  int aq_mode;
46  int layering_mode;
47  int output_obu;
48  int decode;
49  int tune_content;
50  int show_psnr;
51  bool use_external_rc;
52 } AppInput;
53 
54 typedef enum {
55  QUANTIZER = 0,
56  BITRATE,
57  SCALE_FACTOR,
58  AUTO_ALT_REF,
59  ALL_OPTION_TYPES
60 } LAYER_OPTION_TYPE;
61 
62 static const arg_def_t outputfile =
63  ARG_DEF("o", "output", 1, "Output filename");
64 static const arg_def_t frames_arg =
65  ARG_DEF("f", "frames", 1, "Number of frames to encode");
66 static const arg_def_t threads_arg =
67  ARG_DEF("th", "threads", 1, "Number of threads to use");
68 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
69 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
70 static const arg_def_t timebase_arg =
71  ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
72 static const arg_def_t bitrate_arg = ARG_DEF(
73  "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
74 static const arg_def_t spatial_layers_arg =
75  ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
76 static const arg_def_t temporal_layers_arg =
77  ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
78 static const arg_def_t layering_mode_arg =
79  ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
80 static const arg_def_t kf_dist_arg =
81  ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
82 static const arg_def_t scale_factors_arg =
83  ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
84 static const arg_def_t min_q_arg =
85  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
86 static const arg_def_t max_q_arg =
87  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
88 static const arg_def_t speed_arg =
89  ARG_DEF("sp", "speed", 1, "Speed configuration");
90 static const arg_def_t aqmode_arg =
91  ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
92 static const arg_def_t bitrates_arg =
93  ARG_DEF("bl", "bitrates", 1,
94  "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
95 static const arg_def_t dropframe_thresh_arg =
96  ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
97 static const arg_def_t error_resilient_arg =
98  ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
99 static const arg_def_t output_obu_arg =
100  ARG_DEF(NULL, "output-obu", 1,
101  "Write OBUs when set to 1. Otherwise write IVF files.");
102 static const arg_def_t test_decode_arg =
103  ARG_DEF(NULL, "test-decode", 1,
104  "Attempt to test decoding the output when set to 1. Default is 1.");
105 static const arg_def_t psnr_arg =
106  ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
107 static const arg_def_t ext_rc_arg =
108  ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
109 static const struct arg_enum_list tune_content_enum[] = {
110  { "default", AOM_CONTENT_DEFAULT },
111  { "screen", AOM_CONTENT_SCREEN },
112  { "film", AOM_CONTENT_FILM },
113  { NULL, 0 }
114 };
115 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
116  NULL, "tune-content", 1, "Tune content type", tune_content_enum);
117 
118 #if CONFIG_AV1_HIGHBITDEPTH
119 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
120  { "10", AOM_BITS_10 },
121  { NULL, 0 } };
122 
123 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
124  "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
125 #endif // CONFIG_AV1_HIGHBITDEPTH
126 
127 static const arg_def_t *svc_args[] = {
128  &frames_arg, &outputfile, &width_arg,
129  &height_arg, &timebase_arg, &bitrate_arg,
130  &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg,
131  &min_q_arg, &max_q_arg, &temporal_layers_arg,
132  &layering_mode_arg, &threads_arg, &aqmode_arg,
133 #if CONFIG_AV1_HIGHBITDEPTH
134  &bitdepth_arg,
135 #endif
136  &speed_arg, &bitrates_arg, &dropframe_thresh_arg,
137  &error_resilient_arg, &output_obu_arg, &test_decode_arg,
138  &tune_content_arg, &psnr_arg, NULL,
139 };
140 
141 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
142 
143 static const char *exec_name;
144 
145 void usage_exit(void) {
146  fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
147  exec_name);
148  fprintf(stderr, "Options:\n");
149  arg_show_usage(stderr, svc_args);
150  exit(EXIT_FAILURE);
151 }
152 
153 static int file_is_y4m(const char detect[4]) {
154  return memcmp(detect, "YUV4", 4) == 0;
155 }
156 
157 static int fourcc_is_ivf(const char detect[4]) {
158  if (memcmp(detect, "DKIF", 4) == 0) {
159  return 1;
160  }
161  return 0;
162 }
163 
164 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
165  1 };
166 
167 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
168 
169 static void open_input_file(struct AvxInputContext *input,
171  /* Parse certain options from the input file, if possible */
172  input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
173  : set_binary_mode(stdin);
174 
175  if (!input->file) fatal("Failed to open input file");
176 
177  if (!fseeko(input->file, 0, SEEK_END)) {
178  /* Input file is seekable. Figure out how long it is, so we can get
179  * progress info.
180  */
181  input->length = ftello(input->file);
182  rewind(input->file);
183  }
184 
185  /* Default to 1:1 pixel aspect ratio. */
186  input->pixel_aspect_ratio.numerator = 1;
187  input->pixel_aspect_ratio.denominator = 1;
188 
189  /* For RAW input sources, these bytes will applied on the first frame
190  * in read_frame().
191  */
192  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
193  input->detect.position = 0;
194 
195  if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
196  if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
197  input->only_i420) >= 0) {
198  input->file_type = FILE_TYPE_Y4M;
199  input->width = input->y4m.pic_w;
200  input->height = input->y4m.pic_h;
201  input->pixel_aspect_ratio.numerator = input->y4m.par_n;
202  input->pixel_aspect_ratio.denominator = input->y4m.par_d;
203  input->framerate.numerator = input->y4m.fps_n;
204  input->framerate.denominator = input->y4m.fps_d;
205  input->fmt = input->y4m.aom_fmt;
206  input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
207  } else {
208  fatal("Unsupported Y4M stream.");
209  }
210  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
211  fatal("IVF is not supported as input.");
212  } else {
213  input->file_type = FILE_TYPE_RAW;
214  }
215 }
216 
217 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
218  int *value0, int *value1) {
219  if (type == SCALE_FACTOR) {
220  *value0 = (int)strtol(input, &input, 10);
221  if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
222  *value1 = (int)strtol(input, &input, 10);
223 
224  if (*value0 < option_min_values[SCALE_FACTOR] ||
225  *value1 < option_min_values[SCALE_FACTOR] ||
226  *value0 > option_max_values[SCALE_FACTOR] ||
227  *value1 > option_max_values[SCALE_FACTOR] ||
228  *value0 > *value1) // num shouldn't be greater than den
230  } else {
231  *value0 = atoi(input);
232  if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
234  }
235  return AOM_CODEC_OK;
236 }
237 
238 static aom_codec_err_t parse_layer_options_from_string(
239  aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
240  int *option0, int *option1) {
242  char *input_string;
243  char *token;
244  const char *delim = ",";
245  int num_layers = svc_params->number_spatial_layers;
246  int i = 0;
247 
248  if (type == BITRATE)
249  num_layers =
250  svc_params->number_spatial_layers * svc_params->number_temporal_layers;
251 
252  if (input == NULL || option0 == NULL ||
253  (option1 == NULL && type == SCALE_FACTOR))
255 
256  const size_t input_length = strlen(input);
257  input_string = reinterpret_cast<char *>(malloc(input_length + 1));
258  if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
259  memcpy(input_string, input, input_length + 1);
260  token = strtok(input_string, delim); // NOLINT
261  for (i = 0; i < num_layers; ++i) {
262  if (token != NULL) {
263  res = extract_option(type, token, option0 + i, option1 + i);
264  if (res != AOM_CODEC_OK) break;
265  token = strtok(NULL, delim); // NOLINT
266  } else {
268  break;
269  }
270  }
271  free(input_string);
272  return res;
273 }
274 
275 static void parse_command_line(int argc, const char **argv_,
276  AppInput *app_input,
277  aom_svc_params_t *svc_params,
278  aom_codec_enc_cfg_t *enc_cfg) {
279  struct arg arg;
280  char **argv = NULL;
281  char **argi = NULL;
282  char **argj = NULL;
283  char string_options[1024] = { 0 };
284 
285  // Default settings
286  svc_params->number_spatial_layers = 1;
287  svc_params->number_temporal_layers = 1;
288  app_input->layering_mode = 0;
289  app_input->output_obu = 0;
290  app_input->decode = 1;
291  enc_cfg->g_threads = 1;
292  enc_cfg->rc_end_usage = AOM_CBR;
293 
294  // process command line options
295  argv = argv_dup(argc - 1, argv_ + 1);
296  if (!argv) {
297  fprintf(stderr, "Error allocating argument list\n");
298  exit(EXIT_FAILURE);
299  }
300  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
301  arg.argv_step = 1;
302 
303  if (arg_match(&arg, &outputfile, argi)) {
304  app_input->output_filename = arg.val;
305  } else if (arg_match(&arg, &width_arg, argi)) {
306  enc_cfg->g_w = arg_parse_uint(&arg);
307  } else if (arg_match(&arg, &height_arg, argi)) {
308  enc_cfg->g_h = arg_parse_uint(&arg);
309  } else if (arg_match(&arg, &timebase_arg, argi)) {
310  enc_cfg->g_timebase = arg_parse_rational(&arg);
311  } else if (arg_match(&arg, &bitrate_arg, argi)) {
312  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
313  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
314  svc_params->number_spatial_layers = arg_parse_uint(&arg);
315  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
316  svc_params->number_temporal_layers = arg_parse_uint(&arg);
317  } else if (arg_match(&arg, &speed_arg, argi)) {
318  app_input->speed = arg_parse_uint(&arg);
319  if (app_input->speed > 11) {
320  aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
321  }
322  } else if (arg_match(&arg, &aqmode_arg, argi)) {
323  app_input->aq_mode = arg_parse_uint(&arg);
324  } else if (arg_match(&arg, &threads_arg, argi)) {
325  enc_cfg->g_threads = arg_parse_uint(&arg);
326  } else if (arg_match(&arg, &layering_mode_arg, argi)) {
327  app_input->layering_mode = arg_parse_int(&arg);
328  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
329  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
330  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
331  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
332  aom_codec_err_t res = parse_layer_options_from_string(
333  svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
334  svc_params->scaling_factor_den);
335  if (res != AOM_CODEC_OK) {
336  die("Failed to parse scale factors: %s\n",
338  }
339  } else if (arg_match(&arg, &min_q_arg, argi)) {
340  enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
341  } else if (arg_match(&arg, &max_q_arg, argi)) {
342  enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
343 #if CONFIG_AV1_HIGHBITDEPTH
344  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
345  enc_cfg->g_bit_depth =
346  static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
347  switch (enc_cfg->g_bit_depth) {
348  case AOM_BITS_8:
349  enc_cfg->g_input_bit_depth = 8;
350  enc_cfg->g_profile = 0;
351  break;
352  case AOM_BITS_10:
353  enc_cfg->g_input_bit_depth = 10;
354  enc_cfg->g_profile = 0;
355  break;
356  default:
357  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
358  }
359 #endif // CONFIG_VP9_HIGHBITDEPTH
360  } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
361  enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
362  } else if (arg_match(&arg, &error_resilient_arg, argi)) {
363  enc_cfg->g_error_resilient = arg_parse_uint(&arg);
364  if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
365  die("Invalid value for error resilient (0, 1): %d.",
366  enc_cfg->g_error_resilient);
367  } else if (arg_match(&arg, &output_obu_arg, argi)) {
368  app_input->output_obu = arg_parse_uint(&arg);
369  if (app_input->output_obu != 0 && app_input->output_obu != 1)
370  die("Invalid value for obu output flag (0, 1): %d.",
371  app_input->output_obu);
372  } else if (arg_match(&arg, &test_decode_arg, argi)) {
373  app_input->decode = arg_parse_uint(&arg);
374  if (app_input->decode != 0 && app_input->decode != 1)
375  die("Invalid value for test decode flag (0, 1): %d.",
376  app_input->decode);
377  } else if (arg_match(&arg, &tune_content_arg, argi)) {
378  app_input->tune_content = arg_parse_enum_or_int(&arg);
379  printf("tune content %d\n", app_input->tune_content);
380  } else if (arg_match(&arg, &psnr_arg, argi)) {
381  app_input->show_psnr = 1;
382  } else if (arg_match(&arg, &ext_rc_arg, argi)) {
383  app_input->use_external_rc = true;
384  } else {
385  ++argj;
386  }
387  }
388 
389  // Total bitrate needs to be parsed after the number of layers.
390  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
391  arg.argv_step = 1;
392  if (arg_match(&arg, &bitrates_arg, argi)) {
393  aom_codec_err_t res = parse_layer_options_from_string(
394  svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
395  if (res != AOM_CODEC_OK) {
396  die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
397  }
398  } else {
399  ++argj;
400  }
401  }
402 
403  // There will be a space in front of the string options
404  if (strlen(string_options) > 0)
405  strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
406 
407  // Check for unrecognized options
408  for (argi = argv; *argi; ++argi)
409  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
410  die("Error: Unrecognized option %s\n", *argi);
411 
412  if (argv[0] == NULL) {
413  usage_exit();
414  }
415 
416  app_input->input_ctx.filename = argv[0];
417  free(argv);
418 
419  open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN);
420  if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
421  enc_cfg->g_w = app_input->input_ctx.width;
422  enc_cfg->g_h = app_input->input_ctx.height;
423  }
424 
425  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
426  enc_cfg->g_h % 2)
427  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
428 
429  printf(
430  "Codec %s\n"
431  "layers: %d\n"
432  "width %u, height: %u\n"
433  "num: %d, den: %d, bitrate: %u\n"
434  "gop size: %u\n",
436  svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
437  enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
438  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
439 }
440 
441 static int mode_to_num_temporal_layers[12] = {
442  1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
443 };
444 static int mode_to_num_spatial_layers[12] = {
445  1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
446 };
447 
448 // For rate control encoding stats.
449 struct RateControlMetrics {
450  // Number of input frames per layer.
451  int layer_input_frames[AOM_MAX_TS_LAYERS];
452  // Number of encoded non-key frames per layer.
453  int layer_enc_frames[AOM_MAX_TS_LAYERS];
454  // Framerate per layer layer (cumulative).
455  double layer_framerate[AOM_MAX_TS_LAYERS];
456  // Target average frame size per layer (per-frame-bandwidth per layer).
457  double layer_pfb[AOM_MAX_LAYERS];
458  // Actual average frame size per layer.
459  double layer_avg_frame_size[AOM_MAX_LAYERS];
460  // Average rate mismatch per layer (|target - actual| / target).
461  double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
462  // Actual encoding bitrate per layer (cumulative across temporal layers).
463  double layer_encoding_bitrate[AOM_MAX_LAYERS];
464  // Average of the short-time encoder actual bitrate.
465  // TODO(marpan): Should we add these short-time stats for each layer?
466  double avg_st_encoding_bitrate;
467  // Variance of the short-time encoder actual bitrate.
468  double variance_st_encoding_bitrate;
469  // Window (number of frames) for computing short-timee encoding bitrate.
470  int window_size;
471  // Number of window measurements.
472  int window_count;
473  int layer_target_bitrate[AOM_MAX_LAYERS];
474 };
475 
476 static const int REF_FRAMES = 8;
477 
478 static const int INTER_REFS_PER_FRAME = 7;
479 
480 // Reference frames used in this example encoder.
481 enum {
482  SVC_LAST_FRAME = 0,
483  SVC_LAST2_FRAME,
484  SVC_LAST3_FRAME,
485  SVC_GOLDEN_FRAME,
486  SVC_BWDREF_FRAME,
487  SVC_ALTREF2_FRAME,
488  SVC_ALTREF_FRAME
489 };
490 
491 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
492  FILE *f = input_ctx->file;
493  y4m_input *y4m = &input_ctx->y4m;
494  int shortread = 0;
495 
496  if (input_ctx->file_type == FILE_TYPE_Y4M) {
497  if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
498  } else {
499  shortread = read_yuv_frame(input_ctx, img);
500  }
501 
502  return !shortread;
503 }
504 
505 static void close_input_file(struct AvxInputContext *input) {
506  fclose(input->file);
507  if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
508 }
509 
510 // Note: these rate control metrics assume only 1 key frame in the
511 // sequence (i.e., first frame only). So for temporal pattern# 7
512 // (which has key frame for every frame on base layer), the metrics
513 // computation will be off/wrong.
514 // TODO(marpan): Update these metrics to account for multiple key frames
515 // in the stream.
516 static void set_rate_control_metrics(struct RateControlMetrics *rc,
517  double framerate, int ss_number_layers,
518  int ts_number_layers) {
519  int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
520  ts_rate_decimator[0] = 1;
521  if (ts_number_layers == 2) {
522  ts_rate_decimator[0] = 2;
523  ts_rate_decimator[1] = 1;
524  }
525  if (ts_number_layers == 3) {
526  ts_rate_decimator[0] = 4;
527  ts_rate_decimator[1] = 2;
528  ts_rate_decimator[2] = 1;
529  }
530  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
531  // per-frame-bandwidth, for the rate control encoding stats below.
532  for (int sl = 0; sl < ss_number_layers; ++sl) {
533  int i = sl * ts_number_layers;
534  rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
535  rc->layer_pfb[i] =
536  1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
537  for (int tl = 0; tl < ts_number_layers; ++tl) {
538  i = sl * ts_number_layers + tl;
539  if (tl > 0) {
540  rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
541  rc->layer_pfb[i] =
542  1000.0 *
543  (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
544  (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
545  }
546  rc->layer_input_frames[tl] = 0;
547  rc->layer_enc_frames[tl] = 0;
548  rc->layer_encoding_bitrate[i] = 0.0;
549  rc->layer_avg_frame_size[i] = 0.0;
550  rc->layer_avg_rate_mismatch[i] = 0.0;
551  }
552  }
553  rc->window_count = 0;
554  rc->window_size = 15;
555  rc->avg_st_encoding_bitrate = 0.0;
556  rc->variance_st_encoding_bitrate = 0.0;
557 }
558 
559 static void printout_rate_control_summary(struct RateControlMetrics *rc,
560  int frame_cnt, int ss_number_layers,
561  int ts_number_layers) {
562  int tot_num_frames = 0;
563  double perc_fluctuation = 0.0;
564  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
565  printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
566  for (int sl = 0; sl < ss_number_layers; ++sl) {
567  tot_num_frames = 0;
568  for (int tl = 0; tl < ts_number_layers; ++tl) {
569  int i = sl * ts_number_layers + tl;
570  const int num_dropped =
571  tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
572  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
573  tot_num_frames += rc->layer_input_frames[tl];
574  rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
575  rc->layer_encoding_bitrate[i] /
576  tot_num_frames;
577  rc->layer_avg_frame_size[i] =
578  rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
579  rc->layer_avg_rate_mismatch[i] =
580  100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
581  printf("For layer#: %d %d \n", sl, tl);
582  printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
583  rc->layer_encoding_bitrate[i]);
584  printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
585  rc->layer_avg_frame_size[i]);
586  printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
587  printf(
588  "Number of input frames, encoded (non-key) frames, "
589  "and perc dropped frames: %d %d %f\n",
590  rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
591  100.0 * num_dropped / rc->layer_input_frames[tl]);
592  printf("\n");
593  }
594  }
595  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
596  rc->variance_st_encoding_bitrate =
597  rc->variance_st_encoding_bitrate / rc->window_count -
598  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
599  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
600  rc->avg_st_encoding_bitrate;
601  printf("Short-time stats, for window of %d frames:\n", rc->window_size);
602  printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
603  rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
604  perc_fluctuation);
605  if (frame_cnt - 1 != tot_num_frames)
606  die("Error: Number of input frames not equal to output!\n");
607 }
608 
609 // Layer pattern configuration.
610 static void set_layer_pattern(
611  int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
612  aom_svc_ref_frame_config_t *ref_frame_config,
613  aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
614  int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
615  // Setting this flag to 1 enables simplex example of
616  // RPS (Reference Picture Selection) for 1 layer.
617  int use_rps_example = 0;
618  int i;
619  int enable_longterm_temporal_ref = 1;
620  int shift = (layering_mode == 8) ? 2 : 0;
621  int simulcast_mode = (layering_mode == 11);
622  *use_svc_control = 1;
623  layer_id->spatial_layer_id = spatial_layer_id;
624  int lag_index = 0;
625  int base_count = superframe_cnt >> 2;
626  ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
627  ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
628  ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
629  // Set the reference map buffer idx for the 7 references:
630  // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
631  // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
632  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
633  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
634  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
635 
636  if (ksvc_mode) {
637  // Same pattern as case 9, but the reference strucutre will be constrained
638  // below.
639  layering_mode = 9;
640  }
641  switch (layering_mode) {
642  case 0:
643  if (use_rps_example == 0) {
644  // 1-layer: update LAST on every frame, reference LAST.
645  layer_id->temporal_layer_id = 0;
646  layer_id->spatial_layer_id = 0;
647  ref_frame_config->refresh[0] = 1;
648  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
649  } else {
650  // Pattern of 2 references (ALTREF and GOLDEN) trailing
651  // LAST by 4 and 8 frames, with some switching logic to
652  // sometimes only predict from the longer-term reference
653  //(golden here). This is simple example to test RPS
654  // (reference picture selection).
655  int last_idx = 0;
656  int last_idx_refresh = 0;
657  int gld_idx = 0;
658  int alt_ref_idx = 0;
659  int lag_alt = 4;
660  int lag_gld = 8;
661  layer_id->temporal_layer_id = 0;
662  layer_id->spatial_layer_id = 0;
663  int sh = 8; // slots 0 - 7.
664  // Moving index slot for last: 0 - (sh - 1)
665  if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
666  // Moving index for refresh of last: one ahead for next frame.
667  last_idx_refresh = superframe_cnt % sh;
668  // Moving index for gld_ref, lag behind current by lag_gld
669  if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
670  // Moving index for alt_ref, lag behind LAST by lag_alt frames.
671  if (superframe_cnt > lag_alt)
672  alt_ref_idx = (superframe_cnt - lag_alt) % sh;
673  // Set the ref_idx.
674  // Default all references to slot for last.
675  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
676  ref_frame_config->ref_idx[i] = last_idx;
677  // Set the ref_idx for the relevant references.
678  ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
679  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
680  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
681  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
682  // Refresh this slot, which will become LAST on next frame.
683  ref_frame_config->refresh[last_idx_refresh] = 1;
684  // Reference LAST, ALTREF, and GOLDEN
685  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
686  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
687  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
688  // Switch to only GOLDEN every 300 frames.
689  if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
690  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
691  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
692  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
693  // Test if the long-term is LAST instead, this is just a renaming
694  // but its tests if encoder behaves the same, whether its
695  // LAST or GOLDEN.
696  if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
697  ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
698  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
699  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
700  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
701  }
702  }
703  }
704  break;
705  case 1:
706  // 2-temporal layer.
707  // 1 3 5
708  // 0 2 4
709  // Keep golden fixed at slot 3.
710  base_count = superframe_cnt >> 1;
711  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
712  // Cyclically refresh slots 5, 6, 7, for lag alt ref.
713  lag_index = 5;
714  if (base_count > 0) {
715  lag_index = 5 + (base_count % 3);
716  if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
717  }
718  // Set the altref slot to lag_index.
719  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
720  if (superframe_cnt % 2 == 0) {
721  layer_id->temporal_layer_id = 0;
722  // Update LAST on layer 0, reference LAST.
723  ref_frame_config->refresh[0] = 1;
724  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
725  // Refresh lag_index slot, needed for lagging golen.
726  ref_frame_config->refresh[lag_index] = 1;
727  // Refresh GOLDEN every x base layer frames.
728  if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
729  } else {
730  layer_id->temporal_layer_id = 1;
731  // No updates on layer 1, reference LAST (TL0).
732  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
733  }
734  // Always reference golden and altref on TL0.
735  if (layer_id->temporal_layer_id == 0) {
736  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
737  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
738  }
739  break;
740  case 2:
741  // 3-temporal layer:
742  // 1 3 5 7
743  // 2 6
744  // 0 4 8
745  if (superframe_cnt % 4 == 0) {
746  // Base layer.
747  layer_id->temporal_layer_id = 0;
748  // Update LAST on layer 0, reference LAST.
749  ref_frame_config->refresh[0] = 1;
750  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
751  } else if ((superframe_cnt - 1) % 4 == 0) {
752  layer_id->temporal_layer_id = 2;
753  // First top layer: no updates, only reference LAST (TL0).
754  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
755  } else if ((superframe_cnt - 2) % 4 == 0) {
756  layer_id->temporal_layer_id = 1;
757  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
758  ref_frame_config->refresh[1] = 1;
759  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
760  } else if ((superframe_cnt - 3) % 4 == 0) {
761  layer_id->temporal_layer_id = 2;
762  // Second top layer: no updates, only reference LAST.
763  // Set buffer idx for LAST to slot 1, since that was the slot
764  // updated in previous frame. So LAST is TL1 frame.
765  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
766  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
767  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
768  }
769  break;
770  case 3:
771  // 3 TL, same as above, except allow for predicting
772  // off 2 more references (GOLDEN and ALTREF), with
773  // GOLDEN updated periodically, and ALTREF lagging from
774  // LAST from ~4 frames. Both GOLDEN and ALTREF
775  // can only be updated on base temporal layer.
776 
777  // Keep golden fixed at slot 3.
778  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
779  // Cyclically refresh slots 5, 6, 7, for lag altref.
780  lag_index = 5;
781  if (base_count > 0) {
782  lag_index = 5 + (base_count % 3);
783  if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
784  }
785  // Set the altref slot to lag_index.
786  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
787  if (superframe_cnt % 4 == 0) {
788  // Base layer.
789  layer_id->temporal_layer_id = 0;
790  // Update LAST on layer 0, reference LAST.
791  ref_frame_config->refresh[0] = 1;
792  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
793  // Refresh GOLDEN every x ~10 base layer frames.
794  if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
795  // Refresh lag_index slot, needed for lagging altref.
796  ref_frame_config->refresh[lag_index] = 1;
797  } else if ((superframe_cnt - 1) % 4 == 0) {
798  layer_id->temporal_layer_id = 2;
799  // First top layer: no updates, only reference LAST (TL0).
800  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
801  } else if ((superframe_cnt - 2) % 4 == 0) {
802  layer_id->temporal_layer_id = 1;
803  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
804  ref_frame_config->refresh[1] = 1;
805  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
806  } else if ((superframe_cnt - 3) % 4 == 0) {
807  layer_id->temporal_layer_id = 2;
808  // Second top layer: no updates, only reference LAST.
809  // Set buffer idx for LAST to slot 1, since that was the slot
810  // updated in previous frame. So LAST is TL1 frame.
811  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
812  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
813  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
814  }
815  // Every frame can reference GOLDEN AND ALTREF.
816  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
817  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
818  // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
819  if (speed >= 7) {
820  ref_frame_comp_pred->use_comp_pred[2] = 1;
821  ref_frame_comp_pred->use_comp_pred[0] = 1;
822  }
823  break;
824  case 4:
825  // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
826  // only reference GF (not LAST). Other frames only reference LAST.
827  // 1 3 5 7
828  // 2 6
829  // 0 4 8
830  if (superframe_cnt % 4 == 0) {
831  // Base layer.
832  layer_id->temporal_layer_id = 0;
833  // Update LAST on layer 0, only reference LAST.
834  ref_frame_config->refresh[0] = 1;
835  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
836  } else if ((superframe_cnt - 1) % 4 == 0) {
837  layer_id->temporal_layer_id = 2;
838  // First top layer: no updates, only reference LAST (TL0).
839  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
840  } else if ((superframe_cnt - 2) % 4 == 0) {
841  layer_id->temporal_layer_id = 1;
842  // Middle layer (TL1): update GF, only reference LAST (TL0).
843  ref_frame_config->refresh[3] = 1;
844  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845  } else if ((superframe_cnt - 3) % 4 == 0) {
846  layer_id->temporal_layer_id = 2;
847  // Second top layer: no updates, only reference GF.
848  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
849  }
850  break;
851  case 5:
852  // 2 spatial layers, 1 temporal.
853  layer_id->temporal_layer_id = 0;
854  if (layer_id->spatial_layer_id == 0) {
855  // Reference LAST, update LAST.
856  ref_frame_config->refresh[0] = 1;
857  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
858  } else if (layer_id->spatial_layer_id == 1) {
859  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
860  // and GOLDEN to slot 0. Update slot 1 (LAST).
861  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
862  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
863  ref_frame_config->refresh[1] = 1;
864  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
865  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
866  }
867  break;
868  case 6:
869  // 3 spatial layers, 1 temporal.
870  // Note for this case, we set the buffer idx for all references to be
871  // either LAST or GOLDEN, which are always valid references, since decoder
872  // will check if any of the 7 references is valid scale in
873  // valid_ref_frame_size().
874  layer_id->temporal_layer_id = 0;
875  if (layer_id->spatial_layer_id == 0) {
876  // Reference LAST, update LAST. Set all buffer_idx to 0.
877  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
878  ref_frame_config->ref_idx[i] = 0;
879  ref_frame_config->refresh[0] = 1;
880  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
881  } else if (layer_id->spatial_layer_id == 1) {
882  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
883  // and GOLDEN (and all other refs) to slot 0.
884  // Update slot 1 (LAST).
885  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
886  ref_frame_config->ref_idx[i] = 0;
887  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
888  ref_frame_config->refresh[1] = 1;
889  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
890  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
891  } else if (layer_id->spatial_layer_id == 2) {
892  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
893  // and GOLDEN (and all other refs) to slot 1.
894  // Update slot 2 (LAST).
895  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
896  ref_frame_config->ref_idx[i] = 1;
897  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
898  ref_frame_config->refresh[2] = 1;
899  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
900  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
901  // For 3 spatial layer case: allow for top spatial layer to use
902  // additional temporal reference. Update every 10 frames.
903  if (enable_longterm_temporal_ref) {
904  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
905  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
906  if (base_count % 10 == 0)
907  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
908  }
909  }
910  break;
911  case 7:
912  // 2 spatial and 3 temporal layer.
913  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
914  if (superframe_cnt % 4 == 0) {
915  // Base temporal layer
916  layer_id->temporal_layer_id = 0;
917  if (layer_id->spatial_layer_id == 0) {
918  // Reference LAST, update LAST
919  // Set all buffer_idx to 0
920  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
921  ref_frame_config->ref_idx[i] = 0;
922  ref_frame_config->refresh[0] = 1;
923  } else if (layer_id->spatial_layer_id == 1) {
924  // Reference LAST and GOLDEN.
925  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
926  ref_frame_config->ref_idx[i] = 0;
927  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
928  ref_frame_config->refresh[1] = 1;
929  }
930  } else if ((superframe_cnt - 1) % 4 == 0) {
931  // First top temporal enhancement layer.
932  layer_id->temporal_layer_id = 2;
933  if (layer_id->spatial_layer_id == 0) {
934  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
935  ref_frame_config->ref_idx[i] = 0;
936  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
937  ref_frame_config->refresh[3] = 1;
938  } else if (layer_id->spatial_layer_id == 1) {
939  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
940  // GOLDEN (and all other refs) to slot 3.
941  // No update.
942  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
943  ref_frame_config->ref_idx[i] = 3;
944  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
945  }
946  } else if ((superframe_cnt - 2) % 4 == 0) {
947  // Middle temporal enhancement layer.
948  layer_id->temporal_layer_id = 1;
949  if (layer_id->spatial_layer_id == 0) {
950  // Reference LAST.
951  // Set all buffer_idx to 0.
952  // Set GOLDEN to slot 5 and update slot 5.
953  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
954  ref_frame_config->ref_idx[i] = 0;
955  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
956  ref_frame_config->refresh[5 - shift] = 1;
957  } else if (layer_id->spatial_layer_id == 1) {
958  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
959  // GOLDEN (and all other refs) to slot 5.
960  // Set LAST3 to slot 6 and update slot 6.
961  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
962  ref_frame_config->ref_idx[i] = 5 - shift;
963  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
964  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
965  ref_frame_config->refresh[6 - shift] = 1;
966  }
967  } else if ((superframe_cnt - 3) % 4 == 0) {
968  // Second top temporal enhancement layer.
969  layer_id->temporal_layer_id = 2;
970  if (layer_id->spatial_layer_id == 0) {
971  // Set LAST to slot 5 and reference LAST.
972  // Set GOLDEN to slot 3 and update slot 3.
973  // Set all other buffer_idx to 0.
974  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
975  ref_frame_config->ref_idx[i] = 0;
976  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
977  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
978  ref_frame_config->refresh[3] = 1;
979  } else if (layer_id->spatial_layer_id == 1) {
980  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
981  // GOLDEN to slot 3. No update.
982  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
983  ref_frame_config->ref_idx[i] = 0;
984  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
985  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
986  }
987  }
988  break;
989  case 8:
990  // 3 spatial and 3 temporal layer.
991  // Same as case 9 but overalap in the buffer slot updates.
992  // (shift = 2). The slots 3 and 4 updated by first TL2 are
993  // reused for update in TL1 superframe.
994  // Note for this case, frame order hint must be disabled for
995  // lower resolutios (operating points > 0) to be decoedable.
996  case 9:
997  // 3 spatial and 3 temporal layer.
998  // No overlap in buffer updates between TL2 and TL1.
999  // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1000  // Set the references via the svc_ref_frame_config control.
1001  // Always reference LAST.
1002  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1003  if (superframe_cnt % 4 == 0) {
1004  // Base temporal layer.
1005  layer_id->temporal_layer_id = 0;
1006  if (layer_id->spatial_layer_id == 0) {
1007  // Reference LAST, update LAST.
1008  // Set all buffer_idx to 0.
1009  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1010  ref_frame_config->ref_idx[i] = 0;
1011  ref_frame_config->refresh[0] = 1;
1012  } else if (layer_id->spatial_layer_id == 1) {
1013  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1014  // GOLDEN (and all other refs) to slot 0.
1015  // Update slot 1 (LAST).
1016  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1017  ref_frame_config->ref_idx[i] = 0;
1018  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1019  ref_frame_config->refresh[1] = 1;
1020  } else if (layer_id->spatial_layer_id == 2) {
1021  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1022  // GOLDEN (and all other refs) to slot 1.
1023  // Update slot 2 (LAST).
1024  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1025  ref_frame_config->ref_idx[i] = 1;
1026  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1027  ref_frame_config->refresh[2] = 1;
1028  }
1029  } else if ((superframe_cnt - 1) % 4 == 0) {
1030  // First top temporal enhancement layer.
1031  layer_id->temporal_layer_id = 2;
1032  if (layer_id->spatial_layer_id == 0) {
1033  // Reference LAST (slot 0).
1034  // Set GOLDEN to slot 3 and update slot 3.
1035  // Set all other buffer_idx to slot 0.
1036  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1037  ref_frame_config->ref_idx[i] = 0;
1038  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1039  ref_frame_config->refresh[3] = 1;
1040  } else if (layer_id->spatial_layer_id == 1) {
1041  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1042  // GOLDEN (and all other refs) to slot 3.
1043  // Set LAST2 to slot 4 and Update slot 4.
1044  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1045  ref_frame_config->ref_idx[i] = 3;
1046  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1047  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1048  ref_frame_config->refresh[4] = 1;
1049  } else if (layer_id->spatial_layer_id == 2) {
1050  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1051  // GOLDEN (and all other refs) to slot 4.
1052  // No update.
1053  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1054  ref_frame_config->ref_idx[i] = 4;
1055  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1056  }
1057  } else if ((superframe_cnt - 2) % 4 == 0) {
1058  // Middle temporal enhancement layer.
1059  layer_id->temporal_layer_id = 1;
1060  if (layer_id->spatial_layer_id == 0) {
1061  // Reference LAST.
1062  // Set all buffer_idx to 0.
1063  // Set GOLDEN to slot 5 and update slot 5.
1064  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1065  ref_frame_config->ref_idx[i] = 0;
1066  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1067  ref_frame_config->refresh[5 - shift] = 1;
1068  } else if (layer_id->spatial_layer_id == 1) {
1069  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1070  // GOLDEN (and all other refs) to slot 5.
1071  // Set LAST3 to slot 6 and update slot 6.
1072  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1073  ref_frame_config->ref_idx[i] = 5 - shift;
1074  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1075  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1076  ref_frame_config->refresh[6 - shift] = 1;
1077  } else if (layer_id->spatial_layer_id == 2) {
1078  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1079  // GOLDEN (and all other refs) to slot 6.
1080  // Set LAST3 to slot 7 and update slot 7.
1081  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1082  ref_frame_config->ref_idx[i] = 6 - shift;
1083  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1084  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1085  ref_frame_config->refresh[7 - shift] = 1;
1086  }
1087  } else if ((superframe_cnt - 3) % 4 == 0) {
1088  // Second top temporal enhancement layer.
1089  layer_id->temporal_layer_id = 2;
1090  if (layer_id->spatial_layer_id == 0) {
1091  // Set LAST to slot 5 and reference LAST.
1092  // Set GOLDEN to slot 3 and update slot 3.
1093  // Set all other buffer_idx to 0.
1094  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1095  ref_frame_config->ref_idx[i] = 0;
1096  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1097  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1098  ref_frame_config->refresh[3] = 1;
1099  } else if (layer_id->spatial_layer_id == 1) {
1100  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1101  // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1102  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1103  ref_frame_config->ref_idx[i] = 0;
1104  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1105  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1106  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1107  ref_frame_config->refresh[4] = 1;
1108  } else if (layer_id->spatial_layer_id == 2) {
1109  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1110  // GOLDEN to slot 4. No update.
1111  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1112  ref_frame_config->ref_idx[i] = 0;
1113  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1114  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1115  }
1116  }
1117  break;
1118  case 11:
1119  // Simulcast mode for 3 spatial and 3 temporal layers.
1120  // No inter-layer predicton, only prediction is temporal and single
1121  // reference (LAST).
1122  // No overlap in buffer slots between spatial layers. So for example,
1123  // SL0 only uses slots 0 and 1.
1124  // SL1 only uses slots 2 and 3.
1125  // SL2 only uses slots 4 and 5.
1126  // All 7 references for each inter-frame must only access buffer slots
1127  // for that spatial layer.
1128  // On key (super)frames: SL1 and SL2 must have no references set
1129  // and must refresh all the slots for that layer only (so 2 and 3
1130  // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1131  // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1132  // internally as Intra-only frames that allow that stream to be decoded.
1133  // These conditions will allow for each spatial stream to be
1134  // independently decodeable.
1135 
1136  // Initialize all references to 0 (don't use reference).
1137  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1138  ref_frame_config->reference[i] = 0;
1139  // Initialize as no refresh/update for all slots.
1140  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1141  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1142  ref_frame_config->ref_idx[i] = 0;
1143 
1144  if (is_key_frame) {
1145  if (layer_id->spatial_layer_id == 0) {
1146  // Assign LAST/GOLDEN to slot 0/1.
1147  // Refesh slots 0 and 1 for SL0.
1148  // SL0: this will get set to KEY frame internally.
1149  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1150  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1151  ref_frame_config->refresh[0] = 1;
1152  ref_frame_config->refresh[1] = 1;
1153  } else if (layer_id->spatial_layer_id == 1) {
1154  // Assign LAST/GOLDEN to slot 2/3.
1155  // Refesh slots 2 and 3 for SL1.
1156  // This will get set to Intra-only frame internally.
1157  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1158  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1159  ref_frame_config->refresh[2] = 1;
1160  ref_frame_config->refresh[3] = 1;
1161  } else if (layer_id->spatial_layer_id == 2) {
1162  // Assign LAST/GOLDEN to slot 4/5.
1163  // Refresh slots 4 and 5 for SL2.
1164  // This will get set to Intra-only frame internally.
1165  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1166  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1167  ref_frame_config->refresh[4] = 1;
1168  ref_frame_config->refresh[5] = 1;
1169  }
1170  } else if (superframe_cnt % 4 == 0) {
1171  // Base temporal layer: TL0
1172  layer_id->temporal_layer_id = 0;
1173  if (layer_id->spatial_layer_id == 0) { // SL0
1174  // Reference LAST. Assign all references to either slot
1175  // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1176  // Update slot 0 (LAST).
1177  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1178  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1179  ref_frame_config->ref_idx[i] = 1;
1180  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1181  ref_frame_config->refresh[0] = 1;
1182  } else if (layer_id->spatial_layer_id == 1) { // SL1
1183  // Reference LAST. Assign all references to either slot
1184  // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1185  // Update slot 2 (LAST).
1186  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1187  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1188  ref_frame_config->ref_idx[i] = 3;
1189  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1190  ref_frame_config->refresh[2] = 1;
1191  } else if (layer_id->spatial_layer_id == 2) { // SL2
1192  // Reference LAST. Assign all references to either slot
1193  // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1194  // Update slot 4 (LAST).
1195  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1196  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1197  ref_frame_config->ref_idx[i] = 5;
1198  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1199  ref_frame_config->refresh[4] = 1;
1200  }
1201  } else if ((superframe_cnt - 1) % 4 == 0) {
1202  // First top temporal enhancement layer: TL2
1203  layer_id->temporal_layer_id = 2;
1204  if (layer_id->spatial_layer_id == 0) { // SL0
1205  // Reference LAST (slot 0). Assign other references to slot 1.
1206  // No update/refresh on any slots.
1207  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1208  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1209  ref_frame_config->ref_idx[i] = 1;
1210  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1211  } else if (layer_id->spatial_layer_id == 1) { // SL1
1212  // Reference LAST (slot 2). Assign other references to slot 3.
1213  // No update/refresh on any slots.
1214  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1215  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1216  ref_frame_config->ref_idx[i] = 3;
1217  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1218  } else if (layer_id->spatial_layer_id == 2) { // SL2
1219  // Reference LAST (slot 4). Assign other references to slot 4.
1220  // No update/refresh on any slots.
1221  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1222  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1223  ref_frame_config->ref_idx[i] = 5;
1224  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1225  }
1226  } else if ((superframe_cnt - 2) % 4 == 0) {
1227  // Middle temporal enhancement layer: TL1
1228  layer_id->temporal_layer_id = 1;
1229  if (layer_id->spatial_layer_id == 0) { // SL0
1230  // Reference LAST (slot 0).
1231  // Set GOLDEN to slot 1 and update slot 1.
1232  // This will be used as reference for next TL2.
1233  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1234  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1235  ref_frame_config->ref_idx[i] = 1;
1236  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1237  ref_frame_config->refresh[1] = 1;
1238  } else if (layer_id->spatial_layer_id == 1) { // SL1
1239  // Reference LAST (slot 2).
1240  // Set GOLDEN to slot 3 and update slot 3.
1241  // This will be used as reference for next TL2.
1242  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1243  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1244  ref_frame_config->ref_idx[i] = 3;
1245  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1246  ref_frame_config->refresh[3] = 1;
1247  } else if (layer_id->spatial_layer_id == 2) { // SL2
1248  // Reference LAST (slot 4).
1249  // Set GOLDEN to slot 5 and update slot 5.
1250  // This will be used as reference for next TL2.
1251  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1252  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1253  ref_frame_config->ref_idx[i] = 5;
1254  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1255  ref_frame_config->refresh[5] = 1;
1256  }
1257  } else if ((superframe_cnt - 3) % 4 == 0) {
1258  // Second top temporal enhancement layer: TL2
1259  layer_id->temporal_layer_id = 2;
1260  if (layer_id->spatial_layer_id == 0) { // SL0
1261  // Reference LAST (slot 1). Assign other references to slot 0.
1262  // No update/refresh on any slots.
1263  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1264  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1265  ref_frame_config->ref_idx[i] = 0;
1266  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1267  } else if (layer_id->spatial_layer_id == 1) { // SL1
1268  // Reference LAST (slot 3). Assign other references to slot 2.
1269  // No update/refresh on any slots.
1270  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1271  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1272  ref_frame_config->ref_idx[i] = 2;
1273  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1274  } else if (layer_id->spatial_layer_id == 2) { // SL2
1275  // Reference LAST (slot 5). Assign other references to slot 4.
1276  // No update/refresh on any slots.
1277  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1278  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1279  ref_frame_config->ref_idx[i] = 4;
1280  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1281  }
1282  }
1283  if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1284  // Always reference GOLDEN (inter-layer prediction).
1285  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1286  if (ksvc_mode) {
1287  // KSVC: only keep the inter-layer reference (GOLDEN) for
1288  // superframes whose base is key.
1289  if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1290  }
1291  if (is_key_frame && layer_id->spatial_layer_id > 1) {
1292  // On superframes whose base is key: remove LAST to avoid prediction
1293  // off layer two levels below.
1294  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1295  }
1296  }
1297  // For 3 spatial layer case 8 (where there is free buffer slot):
1298  // allow for top spatial layer to use additional temporal reference.
1299  // Additional reference is only updated on base temporal layer, every
1300  // 10 TL0 frames here.
1301  if (!simulcast_mode && enable_longterm_temporal_ref &&
1302  layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1303  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1304  if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1305  if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1306  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1307  }
1308  break;
1309  default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1310  }
1311 }
1312 
1313 #if CONFIG_AV1_DECODER
1314 // Returns whether there is a mismatch between the encoder's new frame and the
1315 // decoder's new frame.
1316 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1317  const int frames_out) {
1318  aom_image_t enc_img, dec_img;
1319  int mismatch = 0;
1320 
1321  /* Get the internal new frame */
1324 
1325 #if CONFIG_AV1_HIGHBITDEPTH
1326  if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1327  (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1328  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1329  aom_image_t enc_hbd_img;
1330  aom_img_alloc(
1331  &enc_hbd_img,
1332  static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1333  enc_img.d_w, enc_img.d_h, 16);
1334  aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1335  enc_img = enc_hbd_img;
1336  }
1337  if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1338  aom_image_t dec_hbd_img;
1339  aom_img_alloc(
1340  &dec_hbd_img,
1341  static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1342  dec_img.d_w, dec_img.d_h, 16);
1343  aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1344  dec_img = dec_hbd_img;
1345  }
1346  }
1347 #endif
1348 
1349  if (!aom_compare_img(&enc_img, &dec_img)) {
1350  int y[4], u[4], v[4];
1351 #if CONFIG_AV1_HIGHBITDEPTH
1352  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1353  aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1354  } else {
1355  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1356  }
1357 #else
1358  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1359 #endif
1360  fprintf(stderr,
1361  "Encode/decode mismatch on frame %d at"
1362  " Y[%d, %d] {%d/%d},"
1363  " U[%d, %d] {%d/%d},"
1364  " V[%d, %d] {%d/%d}\n",
1365  frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1366  v[1], v[2], v[3]);
1367  mismatch = 1;
1368  }
1369 
1370  aom_img_free(&enc_img);
1371  aom_img_free(&dec_img);
1372  return mismatch;
1373 }
1374 #endif // CONFIG_AV1_DECODER
1375 
1376 struct psnr_stats {
1377  // The second element of these arrays is reserved for high bitdepth.
1378  uint64_t psnr_sse_total[2];
1379  uint64_t psnr_samples_total[2];
1380  double psnr_totals[2][4];
1381  int psnr_count[2];
1382 };
1383 
1384 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1385  double ovpsnr;
1386 
1387  if (!psnr_stream->psnr_count[0]) return;
1388 
1389  fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1390  ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1391  (double)psnr_stream->psnr_sse_total[0]);
1392  fprintf(stderr, " %.3f", ovpsnr);
1393 
1394  for (int i = 0; i < 4; i++) {
1395  fprintf(stderr, " %.3f",
1396  psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1397  }
1398  fprintf(stderr, "\n");
1399 }
1400 
1401 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1402  const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1403  aom::AV1RateControlRtcConfig rc_cfg;
1404  rc_cfg.width = cfg.g_w;
1405  rc_cfg.height = cfg.g_h;
1406  rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1407  rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1408  rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1409  rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1410  rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1411  rc_cfg.buf_sz = cfg.rc_buf_sz;
1412  rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1413  rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1414  // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1415  rc_cfg.max_intra_bitrate_pct = 300;
1416  rc_cfg.framerate = cfg.g_timebase.den;
1417  // TODO(jianj): Add suppor for SVC.
1418  rc_cfg.ss_number_layers = 1;
1419  rc_cfg.ts_number_layers = 1;
1420  rc_cfg.scaling_factor_num[0] = 1;
1421  rc_cfg.scaling_factor_den[0] = 1;
1422  rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1423  rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1424  rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1425  rc_cfg.aq_mode = app_input.aq_mode;
1426 
1427  return rc_cfg;
1428 }
1429 
1430 static int qindex_to_quantizer(int qindex) {
1431  // Table that converts 0-63 range Q values passed in outside to the 0-255
1432  // range Qindex used internally.
1433  static const int quantizer_to_qindex[] = {
1434  0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1435  52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1436  104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1437  156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1438  208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1439  };
1440  for (int quantizer = 0; quantizer < 64; ++quantizer)
1441  if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1442 
1443  return 63;
1444 }
1445 
1446 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1447  aom_codec_ctx_t *codec, int frame_cnt) {
1448  aom_active_map_t map = { 0, 0, 0 };
1449 
1450  map.rows = (cfg->g_h + 15) / 16;
1451  map.cols = (cfg->g_w + 15) / 16;
1452 
1453  map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1454  if (!map.active_map) die("Failed to allocate active map");
1455 
1456  // Example map for testing.
1457  for (unsigned int i = 0; i < map.rows; ++i) {
1458  for (unsigned int j = 0; j < map.cols; ++j) {
1459  int index = map.cols * i + j;
1460  map.active_map[index] = 1;
1461  if (frame_cnt < 300) {
1462  if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1463  } else if (frame_cnt >= 300) {
1464  if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1465  }
1466  }
1467  }
1468 
1469  if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1470  die_codec(codec, "Failed to set active map");
1471 
1472  free(map.active_map);
1473 }
1474 
1475 int main(int argc, const char **argv) {
1476  AppInput app_input;
1477  AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1478  FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1479  AvxVideoWriter *total_layer_file = NULL;
1480  FILE *total_layer_obu_file = NULL;
1481  aom_codec_enc_cfg_t cfg;
1482  int frame_cnt = 0;
1483  aom_image_t raw;
1484  int frame_avail;
1485  int got_data = 0;
1486  int flags = 0;
1487  int i;
1488  int pts = 0; // PTS starts at 0.
1489  int frame_duration = 1; // 1 timebase tick per frame.
1490  aom_svc_layer_id_t layer_id;
1491  aom_svc_params_t svc_params;
1492  aom_svc_ref_frame_config_t ref_frame_config;
1493  aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1494 
1495 #if CONFIG_INTERNAL_STATS
1496  FILE *stats_file = fopen("opsnr.stt", "a");
1497  if (stats_file == NULL) {
1498  die("Cannot open opsnr.stt\n");
1499  }
1500 #endif
1501 #if CONFIG_AV1_DECODER
1502  aom_codec_ctx_t decoder;
1503 #endif
1504 
1505  struct RateControlMetrics rc;
1506  int64_t cx_time = 0;
1507  int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1508  int frame_cnt_layer[AOM_MAX_LAYERS];
1509  double sum_bitrate = 0.0;
1510  double sum_bitrate2 = 0.0;
1511  double framerate = 30.0;
1512  int use_svc_control = 1;
1513  int set_err_resil_frame = 0;
1514  int test_changing_bitrate = 0;
1515  zero(rc.layer_target_bitrate);
1516  memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1517  memset(&app_input, 0, sizeof(AppInput));
1518  memset(&svc_params, 0, sizeof(svc_params));
1519 
1520  // Flag to test dynamic scaling of source frames for single
1521  // spatial stream, using the scaling_mode control.
1522  const int test_dynamic_scaling_single_layer = 0;
1523 
1524  // Flag to test setting speed per layer.
1525  const int test_speed_per_layer = 0;
1526 
1527  // Flag for testing active maps.
1528  const int test_active_maps = 0;
1529 
1530  /* Setup default input stream settings */
1531  app_input.input_ctx.framerate.numerator = 30;
1532  app_input.input_ctx.framerate.denominator = 1;
1533  app_input.input_ctx.only_i420 = 0;
1534  app_input.input_ctx.bit_depth = AOM_BITS_8;
1535  app_input.speed = 7;
1536  exec_name = argv[0];
1537 
1538  // start with default encoder configuration
1541  if (res != AOM_CODEC_OK) {
1542  die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1543  }
1544 
1545  // Real time parameters.
1547 
1548  cfg.rc_end_usage = AOM_CBR;
1549  cfg.rc_min_quantizer = 2;
1550  cfg.rc_max_quantizer = 52;
1551  cfg.rc_undershoot_pct = 50;
1552  cfg.rc_overshoot_pct = 50;
1553  cfg.rc_buf_initial_sz = 600;
1554  cfg.rc_buf_optimal_sz = 600;
1555  cfg.rc_buf_sz = 1000;
1556  cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1557  cfg.g_lag_in_frames = 0;
1558  cfg.kf_mode = AOM_KF_AUTO;
1559 
1560  parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1561 
1562  int ts_number_layers = svc_params.number_temporal_layers;
1563  int ss_number_layers = svc_params.number_spatial_layers;
1564 
1565  unsigned int width = cfg.g_w;
1566  unsigned int height = cfg.g_h;
1567 
1568  if (app_input.layering_mode >= 0) {
1569  if (ts_number_layers !=
1570  mode_to_num_temporal_layers[app_input.layering_mode] ||
1571  ss_number_layers !=
1572  mode_to_num_spatial_layers[app_input.layering_mode]) {
1573  die("Number of layers doesn't match layering mode.");
1574  }
1575  }
1576 
1577  // Y4M reader has its own allocation.
1578  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1579  if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1580  die("Failed to allocate image (%dx%d)", width, height);
1581  }
1582  }
1583 
1584  aom_codec_iface_t *encoder = aom_codec_av1_cx();
1585 
1586  memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1587  sizeof(svc_params.layer_target_bitrate));
1588 
1589  unsigned int total_rate = 0;
1590  for (i = 0; i < ss_number_layers; i++) {
1591  total_rate +=
1592  svc_params
1593  .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1594  }
1595  if (total_rate != cfg.rc_target_bitrate) {
1596  die("Incorrect total target bitrate");
1597  }
1598 
1599  svc_params.framerate_factor[0] = 1;
1600  if (ts_number_layers == 2) {
1601  svc_params.framerate_factor[0] = 2;
1602  svc_params.framerate_factor[1] = 1;
1603  } else if (ts_number_layers == 3) {
1604  svc_params.framerate_factor[0] = 4;
1605  svc_params.framerate_factor[1] = 2;
1606  svc_params.framerate_factor[2] = 1;
1607  }
1608 
1609  if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1610  // Override these settings with the info from Y4M file.
1611  cfg.g_w = app_input.input_ctx.width;
1612  cfg.g_h = app_input.input_ctx.height;
1613  // g_timebase is the reciprocal of frame rate.
1614  cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
1615  cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
1616  }
1617  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1618  set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1619 
1620  AvxVideoInfo info;
1621  info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1622  info.frame_width = cfg.g_w;
1623  info.frame_height = cfg.g_h;
1624  info.time_base.numerator = cfg.g_timebase.num;
1625  info.time_base.denominator = cfg.g_timebase.den;
1626  // Open an output file for each stream.
1627  for (int sl = 0; sl < ss_number_layers; ++sl) {
1628  for (int tl = 0; tl < ts_number_layers; ++tl) {
1629  i = sl * ts_number_layers + tl;
1630  char file_name[PATH_MAX];
1631  snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1632  app_input.output_filename, i);
1633  if (app_input.output_obu) {
1634  obu_files[i] = fopen(file_name, "wb");
1635  if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1636  } else {
1637  outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1638  if (!outfile[i]) die("Failed to open %s for writing", file_name);
1639  }
1640  }
1641  }
1642  if (app_input.output_obu) {
1643  total_layer_obu_file = fopen(app_input.output_filename, "wb");
1644  if (!total_layer_obu_file)
1645  die("Failed to open %s for writing", app_input.output_filename);
1646  } else {
1647  total_layer_file =
1648  aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1649  if (!total_layer_file)
1650  die("Failed to open %s for writing", app_input.output_filename);
1651  }
1652 
1653  // Initialize codec.
1654  aom_codec_ctx_t codec;
1655  aom_codec_flags_t flag = 0;
1657  flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1658  if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1659  die_codec(&codec, "Failed to initialize encoder");
1660 
1661 #if CONFIG_AV1_DECODER
1662  if (app_input.decode) {
1663  if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1664  die_codec(&decoder, "Failed to initialize decoder");
1665  }
1666 #endif
1667 
1668  aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1669  aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1684 
1685  // Settings to reduce key frame encoding time.
1691 
1693 
1694  aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1695  if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1698  // INTRABC is currently disabled for rt mode, as it's too slow.
1700  }
1701 
1702  if (app_input.use_external_rc) {
1704  }
1705 
1707 
1710 
1712 
1713  svc_params.number_spatial_layers = ss_number_layers;
1714  svc_params.number_temporal_layers = ts_number_layers;
1715  for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1716  svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1717  svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1718  }
1719  for (i = 0; i < ss_number_layers; ++i) {
1720  svc_params.scaling_factor_num[i] = 1;
1721  svc_params.scaling_factor_den[i] = 1;
1722  }
1723  if (ss_number_layers == 2) {
1724  svc_params.scaling_factor_num[0] = 1;
1725  svc_params.scaling_factor_den[0] = 2;
1726  } else if (ss_number_layers == 3) {
1727  svc_params.scaling_factor_num[0] = 1;
1728  svc_params.scaling_factor_den[0] = 4;
1729  svc_params.scaling_factor_num[1] = 1;
1730  svc_params.scaling_factor_den[1] = 2;
1731  }
1732  aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1733  // TODO(aomedia:3032): Configure KSVC in fixed mode.
1734 
1735  // This controls the maximum target size of the key frame.
1736  // For generating smaller key frames, use a smaller max_intra_size_pct
1737  // value, like 100 or 200.
1738  {
1739  const int max_intra_size_pct = 300;
1741  max_intra_size_pct);
1742  }
1743 
1744  for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1745  cx_time_layer[lx] = 0;
1746  frame_cnt_layer[lx] = 0;
1747  }
1748 
1749  std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1750  if (app_input.use_external_rc) {
1751  const aom::AV1RateControlRtcConfig rc_cfg =
1752  create_rtc_rc_config(cfg, app_input);
1753  rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1754  }
1755 
1756  frame_avail = 1;
1757  struct psnr_stats psnr_stream;
1758  memset(&psnr_stream, 0, sizeof(psnr_stream));
1759  while (frame_avail || got_data) {
1760  struct aom_usec_timer timer;
1761  frame_avail = read_frame(&(app_input.input_ctx), &raw);
1762  // Loop over spatial layers.
1763  for (int slx = 0; slx < ss_number_layers; slx++) {
1764  aom_codec_iter_t iter = NULL;
1765  const aom_codec_cx_pkt_t *pkt;
1766  int layer = 0;
1767  // Flag for superframe whose base is key.
1768  int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
1769  // For flexible mode:
1770  if (app_input.layering_mode >= 0) {
1771  // Set the reference/update flags, layer_id, and reference_map
1772  // buffer index.
1773  set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1774  &ref_frame_config, &ref_frame_comp_pred,
1775  &use_svc_control, slx, is_key_frame,
1776  (app_input.layering_mode == 10), app_input.speed);
1777  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1778  if (use_svc_control) {
1780  &ref_frame_config);
1782  &ref_frame_comp_pred);
1783  }
1784  // Set the speed per layer.
1785  if (test_speed_per_layer) {
1786  int speed_per_layer = 10;
1787  if (layer_id.spatial_layer_id == 0) {
1788  if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
1789  if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
1790  if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
1791  } else if (layer_id.spatial_layer_id == 1) {
1792  if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
1793  if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
1794  if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
1795  } else if (layer_id.spatial_layer_id == 2) {
1796  if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
1797  if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
1798  if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
1799  }
1800  aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
1801  }
1802  } else {
1803  // Only up to 3 temporal layers supported in fixed mode.
1804  // Only need to set spatial and temporal layer_id: reference
1805  // prediction, refresh, and buffer_idx are set internally.
1806  layer_id.spatial_layer_id = slx;
1807  layer_id.temporal_layer_id = 0;
1808  if (ts_number_layers == 2) {
1809  layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
1810  } else if (ts_number_layers == 3) {
1811  if (frame_cnt % 2 != 0)
1812  layer_id.temporal_layer_id = 2;
1813  else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
1814  layer_id.temporal_layer_id = 1;
1815  }
1816  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1817  }
1818 
1819  if (set_err_resil_frame && cfg.g_error_resilient == 0) {
1820  // Set error_resilient per frame: off/0 for base layer and
1821  // on/1 for enhancement layer frames.
1822  // Note that this is can only be done on the fly/per-frame/layer
1823  // if the config error_resilience is off/0. See the logic for updating
1824  // in set_encoder_config():
1825  // tool_cfg->error_resilient_mode =
1826  // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
1827  const int err_resil_mode =
1828  layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
1830  err_resil_mode);
1831  }
1832 
1833  layer = slx * ts_number_layers + layer_id.temporal_layer_id;
1834  if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1835 
1836  if (test_dynamic_scaling_single_layer) {
1837  // Example to scale source down by 2x2, then 4x4, and then back up to
1838  // 2x2, and then back to original.
1839  int frame_2x2 = 200;
1840  int frame_4x4 = 400;
1841  int frame_2x2up = 600;
1842  int frame_orig = 800;
1843  if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
1844  // Scale source down by 2x2.
1845  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1846  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1847  } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
1848  // Scale source down by 4x4.
1849  struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
1850  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1851  } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
1852  // Source back up to 2x2.
1853  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1854  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1855  } else if (frame_cnt >= frame_orig) {
1856  // Source back up to original resolution (no scaling).
1857  struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
1858  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1859  }
1860  if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
1861  frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
1862  // For dynamic resize testing on single layer: refresh all references
1863  // on the resized frame: this is to avoid decode error:
1864  // if resize goes down by >= 4x4 then libaom decoder will throw an
1865  // error that some reference (even though not used) is beyond the
1866  // limit size (must be smaller than 4x4).
1867  for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
1868  if (use_svc_control) {
1870  &ref_frame_config);
1872  &ref_frame_comp_pred);
1873  }
1874  }
1875  }
1876 
1877  // Change target_bitrate every other frame.
1878  if (test_changing_bitrate && frame_cnt % 2 == 0) {
1879  if (frame_cnt < 500)
1880  cfg.rc_target_bitrate += 10;
1881  else
1882  cfg.rc_target_bitrate -= 10;
1883  // Do big increase and decrease.
1884  if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
1885  if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
1886  if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
1887  // Call change_config, or bypass with new control.
1888  // res = aom_codec_enc_config_set(&codec, &cfg);
1890  cfg.rc_target_bitrate))
1891  die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
1892  }
1893 
1894  if (rc_api) {
1895  aom::AV1FrameParamsRTC frame_params;
1896  // TODO(jianj): Add support for SVC.
1897  frame_params.spatial_layer_id = 0;
1898  frame_params.temporal_layer_id = 0;
1899  frame_params.frame_type =
1900  is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
1901  rc_api->ComputeQP(frame_params);
1902  const int current_qp = rc_api->GetQP();
1904  qindex_to_quantizer(current_qp))) {
1905  die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
1906  }
1907  }
1908 
1909  if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
1910 
1911  // Do the layer encode.
1912  aom_usec_timer_start(&timer);
1913  if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
1914  die_codec(&codec, "Failed to encode frame");
1915  aom_usec_timer_mark(&timer);
1916  cx_time += aom_usec_timer_elapsed(&timer);
1917  cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
1918  frame_cnt_layer[layer] += 1;
1919 
1920  // Get the high motion content flag.
1921  int content_flag = 0;
1923  &content_flag)) {
1924  die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
1925  }
1926 
1927  got_data = 0;
1928  // For simulcast (mode 11): write out each spatial layer to the file.
1929  int ss_layers_write = (app_input.layering_mode == 11)
1930  ? layer_id.spatial_layer_id + 1
1931  : ss_number_layers;
1932  while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
1933  switch (pkt->kind) {
1935  for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
1936  ++sl) {
1937  for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
1938  ++tl) {
1939  int j = sl * ts_number_layers + tl;
1940  if (app_input.output_obu) {
1941  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1942  obu_files[j]);
1943  } else {
1944  aom_video_writer_write_frame(
1945  outfile[j],
1946  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1947  pkt->data.frame.sz, pts);
1948  }
1949  if (sl == layer_id.spatial_layer_id)
1950  rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
1951  }
1952  }
1953  got_data = 1;
1954  // Write everything into the top layer.
1955  if (app_input.output_obu) {
1956  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1957  total_layer_obu_file);
1958  } else {
1959  aom_video_writer_write_frame(
1960  total_layer_file,
1961  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1962  pkt->data.frame.sz, pts);
1963  }
1964  // Keep count of rate control stats per layer (for non-key).
1965  if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
1966  int j = layer_id.spatial_layer_id * ts_number_layers +
1967  layer_id.temporal_layer_id;
1968  assert(j >= 0);
1969  rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
1970  rc.layer_avg_rate_mismatch[j] +=
1971  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
1972  rc.layer_pfb[j];
1973  if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
1974  }
1975 
1976  if (rc_api) {
1977  rc_api->PostEncodeUpdate(pkt->data.frame.sz);
1978  }
1979  // Update for short-time encoding bitrate states, for moving window
1980  // of size rc->window, shifted by rc->window / 2.
1981  // Ignore first window segment, due to key frame.
1982  // For spatial layers: only do this for top/highest SL.
1983  if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1984  sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1985  rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1986  if (frame_cnt % rc.window_size == 0) {
1987  rc.window_count += 1;
1988  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1989  rc.variance_st_encoding_bitrate +=
1990  (sum_bitrate / rc.window_size) *
1991  (sum_bitrate / rc.window_size);
1992  sum_bitrate = 0.0;
1993  }
1994  }
1995  // Second shifted window.
1996  if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1997  slx == ss_number_layers - 1) {
1998  sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1999  if (frame_cnt > 2 * rc.window_size &&
2000  frame_cnt % rc.window_size == 0) {
2001  rc.window_count += 1;
2002  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2003  rc.variance_st_encoding_bitrate +=
2004  (sum_bitrate2 / rc.window_size) *
2005  (sum_bitrate2 / rc.window_size);
2006  sum_bitrate2 = 0.0;
2007  }
2008  }
2009 
2010 #if CONFIG_AV1_DECODER
2011  if (app_input.decode) {
2012  if (aom_codec_decode(
2013  &decoder,
2014  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2015  pkt->data.frame.sz, NULL))
2016  die_codec(&decoder, "Failed to decode frame");
2017  }
2018 #endif
2019 
2020  break;
2021  case AOM_CODEC_PSNR_PKT:
2022  if (app_input.show_psnr) {
2023  psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2024  psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2025  for (int plane = 0; plane < 4; plane++) {
2026  psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2027  }
2028  psnr_stream.psnr_count[0]++;
2029  }
2030  break;
2031  default: break;
2032  }
2033  }
2034 #if CONFIG_AV1_DECODER
2035  if (got_data && app_input.decode) {
2036  // Don't look for mismatch on top spatial and top temporal layers as
2037  // they are non reference frames.
2038  if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2039  !(layer_id.temporal_layer_id > 0 &&
2040  layer_id.temporal_layer_id == ts_number_layers - 1)) {
2041  if (test_decode(&codec, &decoder, frame_cnt)) {
2042 #if CONFIG_INTERNAL_STATS
2043  fprintf(stats_file, "First mismatch occurred in frame %d\n",
2044  frame_cnt);
2045  fclose(stats_file);
2046 #endif
2047  fatal("Mismatch seen");
2048  }
2049  }
2050  }
2051 #endif
2052  } // loop over spatial layers
2053  ++frame_cnt;
2054  pts += frame_duration;
2055  }
2056 
2057  close_input_file(&(app_input.input_ctx));
2058  printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2059  ts_number_layers);
2060 
2061  printf("\n");
2062  for (int slx = 0; slx < ss_number_layers; slx++)
2063  for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2064  int lx = slx * ts_number_layers + tlx;
2065  printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2066  slx, tlx, frame_cnt_layer[lx],
2067  (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2068  1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2069  }
2070 
2071  printf("\n");
2072  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2073  frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2074  1000000 * (double)frame_cnt / (double)cx_time);
2075 
2076  if (app_input.show_psnr) {
2077  show_psnr(&psnr_stream, 255.0);
2078  }
2079 
2080  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2081 
2082 #if CONFIG_AV1_DECODER
2083  if (app_input.decode) {
2084  if (aom_codec_destroy(&decoder))
2085  die_codec(&decoder, "Failed to destroy decoder");
2086  }
2087 #endif
2088 
2089 #if CONFIG_INTERNAL_STATS
2090  fprintf(stats_file, "No mismatch detected in recon buffers\n");
2091  fclose(stats_file);
2092 #endif
2093 
2094  // Try to rewrite the output file headers with the actual frame count.
2095  for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2096  aom_video_writer_close(outfile[i]);
2097  aom_video_writer_close(total_layer_file);
2098 
2099  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
2100  aom_img_free(&raw);
2101  }
2102  return EXIT_SUCCESS;
2103 }
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
@ AOM_CSP_UNKNOWN
Definition: aom_image.h:143
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition: aom_image.h:38
@ AOM_IMG_FMT_I420
Definition: aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition: aomcx.h:1693
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
#define AOM_MAX_TS_LAYERS
Definition: aomcx.h:1695
@ AOM_FULL_SUPERFRAME_DROP
Definition: aomcx.h:1742
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition: aomcx.h:1528
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition: aomcx.h:1070
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition: aomcx.h:408
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition: aomcx.h:468
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition: aomcx.h:1276
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set reference frame config: the ref_idx and the refresh flags for each buff...
Definition: aomcx.h:1287
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition: aomcx.h:497
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition: aomcx.h:506
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition: aomcx.h:1117
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition: aomcx.h:1254
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition: aomcx.h:1203
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition: aomcx.h:1392
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition: aomcx.h:1113
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition: aomcx.h:1038
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition: aomcx.h:1427
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition: aomcx.h:1234
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition: aomcx.h:670
@ AOME_SET_ACTIVEMAP
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition: aomcx.h:190
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition: aomcx.h:1358
@ AV1E_SET_SVC_FRAME_DROP_MODE
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition: aomcx.h:1541
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition: aomcx.h:1281
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition: aomcx.h:1059
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition: aomcx.h:1109
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition: aomcx.h:1088
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition: aomcx.h:306
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition: aomcx.h:442
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition: aomcx.h:697
@ AV1E_SET_AUTO_TILES
Codec control to set auto tiling, unsigned int parameter. Value of 1 means encoder will set number of...
Definition: aomcx.h:1549
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition: aomcx.h:1407
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition: aomcx.h:197
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition: aomcx.h:865
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition: aomcx.h:1131
@ AV1E_SET_POSTENCODE_DROP_RTC
Codec control to enable post encode frame drop for RTC encoding, int parameter.
Definition: aomcx.h:1565
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition: aomcx.h:1028
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition: aomcx.h:220
@ AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC
Codec control to get the high motion content flag, used for screen content realtime (RTC) encoding,...
Definition: aomcx.h:1556
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition: aomcx.h:339
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition: aomcx.h:1490
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition: aomcx.h:1244
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR
Codec control to set the maximum number of consecutive frame drops, in units of time (milliseconds),...
Definition: aomcx.h:1571
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition: aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition: aom_codec.h:228
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition: aom_codec.h:254
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
aom_codec_err_t
Algorithm return codes.
Definition: aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition: aom_codec.h:525
const void * aom_codec_iter_t
Iterator.
Definition: aom_codec.h:288
#define AOM_FRAME_IS_KEY
Definition: aom_codec.h:271
@ AOM_BITS_8
Definition: aom_codec.h:319
@ AOM_BITS_10
Definition: aom_codec.h:320
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition: aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition: aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition: aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition: aom_decoder.h:129
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition: aom_encoder.h:941
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition: aom_encoder.h:1014
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition: aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition: aom_encoder.h:79
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
@ AOM_CBR
Definition: aom_encoder.h:185
@ AOM_KF_AUTO
Definition: aom_encoder.h:200
@ AOM_CODEC_PSNR_PKT
Definition: aom_encoder.h:111
@ AOM_CODEC_CX_FRAME_PKT
Definition: aom_encoder.h:108
aom active region map
Definition: aomcx.h:1627
unsigned int rows
Definition: aomcx.h:1630
unsigned int cols
Definition: aomcx.h:1631
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition: aomcx.h:1629
Codec context structure.
Definition: aom_codec.h:298
Encoder output packet.
Definition: aom_encoder.h:120
enum aom_codec_cx_pkt_kind kind
Definition: aom_encoder.h:121
double psnr[4]
Definition: aom_encoder.h:143
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
Encoder configuration structure.
Definition: aom_encoder.h:385
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: aom_encoder.h:473
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: aom_encoder.h:538
struct aom_rational g_timebase
Stream timebase units.
Definition: aom_encoder.h:487
unsigned int g_usage
Algorithm specific "usage" value.
Definition: aom_encoder.h:397
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition: aom_encoder.h:703
unsigned int g_h
Height of the frame.
Definition: aom_encoder.h:433
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition: aom_encoder.h:766
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: aom_encoder.h:621
unsigned int g_threads
Maximum number of threads to use.
Definition: aom_encoder.h:405
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: aom_encoder.h:775
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: aom_encoder.h:516
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition: aom_encoder.h:712
unsigned int g_profile
Bitstream profile to use.
Definition: aom_encoder.h:415
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: aom_encoder.h:465
unsigned int g_w
Width of the frame.
Definition: aom_encoder.h:424
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition: aom_encoder.h:679
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: aom_encoder.h:784
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: aom_encoder.h:495
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition: aom_encoder.h:666
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition: aom_encoder.h:721
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition: aom_encoder.h:656
unsigned int rc_target_bitrate
Target data rate.
Definition: aom_encoder.h:642
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition: aom_encoder.h:547
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition: aom_encoder.h:688
Image Descriptor.
Definition: aom_image.h:182
aom_img_fmt_t fmt
Definition: aom_image.h:183
unsigned int d_w
Definition: aom_image.h:197
unsigned int d_h
Definition: aom_image.h:198
int num
Definition: aom_encoder.h:163
int den
Definition: aom_encoder.h:164
aom image scaling mode
Definition: aomcx.h:1639
Definition: aomcx.h:1698
int temporal_layer_id
Definition: aomcx.h:1700
int spatial_layer_id
Definition: aomcx.h:1699
Definition: aomcx.h:1709
int max_quantizers[32]
Definition: aomcx.h:1712
int number_spatial_layers
Definition: aomcx.h:1710
int layer_target_bitrate[32]
Definition: aomcx.h:1717
int framerate_factor[8]
Definition: aomcx.h:1719
int min_quantizers[32]
Definition: aomcx.h:1713
int scaling_factor_den[4]
Definition: aomcx.h:1715
int number_temporal_layers
Definition: aomcx.h:1711
int scaling_factor_num[4]
Definition: aomcx.h:1714
Definition: aomcx.h:1733
int use_comp_pred[3]
Definition: aomcx.h:1736
Definition: aomcx.h:1723
int reference[7]
Definition: aomcx.h:1726
int refresh[8]
Definition: aomcx.h:1729
int ref_idx[7]
Definition: aomcx.h:1728