FFmpeg  4.4.5
dct.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 
21 #include "libavutil/mem_internal.h"
22 
23 #include "libavcodec/x86/fdct.h"
26 
27 #if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM
28 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
29  int16_t *block, int16_t *qmat);
30 
31 #define PR_WRAP(INSN) \
32 static void ff_prores_idct_put_10_##INSN##_wrap(int16_t *dst){ \
33  LOCAL_ALIGNED(16, int16_t, qmat, [64]); \
34  LOCAL_ALIGNED(16, int16_t, tmp, [64]); \
35  int i; \
36  \
37  for(i=0; i<64; i++){ \
38  qmat[i]=4; \
39  tmp[i]= dst[i]; \
40  } \
41  ff_prores_idct_put_10_##INSN (dst, 16, tmp, qmat); \
42  \
43  for(i=0; i<64; i++) { \
44  dst[i] -= 512; \
45  } \
46 }
47 
48 PR_WRAP(sse2)
49 
50 # if HAVE_AVX_EXTERNAL
51 void ff_prores_idct_put_10_avx(uint16_t *dst, int linesize,
52  int16_t *block, int16_t *qmat);
53 PR_WRAP(avx)
54 # endif
55 
56 #endif
57 
58 static const struct algo fdct_tab_arch[] = {
59 #if HAVE_MMX_INLINE
61 #endif
62 #if HAVE_MMXEXT_INLINE
64 #endif
65 #if HAVE_SSE2_INLINE
67 #endif
68  { 0 }
69 };
70 
71 static const struct algo idct_tab_arch[] = {
72 #if HAVE_MMX_EXTERNAL
74 #endif
75 #if CONFIG_MPEG4_DECODER && HAVE_X86ASM
76 #if ARCH_X86_32
79 #endif
80 #if HAVE_SSE2_EXTERNAL
82 #endif
83 #endif /* CONFIG_MPEG4_DECODER && HAVE_X86ASM */
84 #if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM
85  { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
86 # if HAVE_AVX_EXTERNAL
87  { "PR-AVX", ff_prores_idct_put_10_avx_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 },
88 # endif
89 #endif
90 #if HAVE_X86ASM
91 #if ARCH_X86_64
92 #if HAVE_SSE2_EXTERNAL
96 #endif
97 #if HAVE_AVX_EXTERNAL
101 #endif
102 #endif
103 #endif
104  { 0 }
105 };
106 
107 static const uint8_t idct_simple_mmx_perm[64] = {
108  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
109  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
110  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
111  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
112  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
113  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
114  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
115  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
116 };
117 
118 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
119 
120 static int permute_x86(int16_t dst[64], const int16_t src[64],
122 {
123  int i;
124 
125  switch (perm_type) {
126  case FF_IDCT_PERM_SIMPLE:
127  for (i = 0; i < 64; i++)
128  dst[idct_simple_mmx_perm[i]] = src[i];
129  return 1;
130  case FF_IDCT_PERM_SSE2:
131  for (i = 0; i < 64; i++)
132  dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
133  return 1;
134  }
135 
136  return 0;
137 }
uint8_t
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
Definition: cpu.h:36
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:31
#define AV_CPU_FLAG_AVX
AVX functions: requires OS support even if YMM registers aren't used.
Definition: cpu.h:49
idct_permutation_type
Definition: idctdsp.h:37
@ FF_IDCT_PERM_SIMPLE
Definition: idctdsp.h:40
@ FF_IDCT_PERM_NONE
Definition: idctdsp.h:38
@ FF_IDCT_PERM_TRANSPOSE
Definition: idctdsp.h:41
@ FF_IDCT_PERM_SSE2
Definition: idctdsp.h:43
int i
Definition: input.c:407
void ff_prores_idct_put_10_sse2(uint16_t *dst, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
void ff_prores_idct_put_10_avx(uint16_t *dst, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
Definition: dct.c:53
enum idct_permutation_type perm_type
Definition: dct.c:56
#define src
Definition: vp8dsp.c:255
static int16_t block[64]
Definition: dct.c:116
static const uint8_t idct_simple_mmx_perm[64]
Definition: dct.c:107
static const struct algo fdct_tab_arch[]
Definition: dct.c:58
static const uint8_t idct_sse2_row_perm[8]
Definition: dct.c:118
static int permute_x86(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
Definition: dct.c:120
static const struct algo idct_tab_arch[]
Definition: dct.c:71
void ff_fdct_sse2(int16_t *block)
void ff_fdct_mmxext(int16_t *block)
void ff_fdct_mmx(int16_t *block)
void ff_simple_idct8_avx(int16_t *block)
void ff_simple_idct10_avx(int16_t *block)
void ff_simple_idct12_avx(int16_t *block)
void ff_simple_idct10_sse2(int16_t *block)
void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct8_sse2(int16_t *block)
void ff_simple_idct12_sse2(int16_t *block)
header for Xvid IDCT functions
void ff_xvid_idct_mmxext(short *block)
void ff_xvid_idct_mmx(short *block)
void ff_xvid_idct_sse2(short *block)