Mercurial > libavcodec.hg
annotate x86/dsputil_mmx.h @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | f4355cd85faa |
children |
rev | line source |
---|---|
8430 | 1 /* |
2 * MMX optimized DSP utils | |
3 * Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 #ifndef AVCODEC_X86_DSPUTIL_MMX_H | |
23 #define AVCODEC_X86_DSPUTIL_MMX_H | |
24 | |
25 #include <stdint.h> | |
26 #include "libavcodec/dsputil.h" | |
27 | |
28 typedef struct { uint64_t a, b; } xmm_reg; | |
29 | |
30 extern const uint64_t ff_bone; | |
31 extern const uint64_t ff_wtwo; | |
32 | |
33 extern const uint64_t ff_pdw_80000000[2]; | |
34 | |
35 extern const uint64_t ff_pw_3; | |
12143 | 36 extern const xmm_reg ff_pw_4; |
8430 | 37 extern const xmm_reg ff_pw_5; |
38 extern const xmm_reg ff_pw_8; | |
39 extern const uint64_t ff_pw_15; | |
40 extern const xmm_reg ff_pw_16; | |
12207
44d480f49053
Add header declarations for mmx/sse constants missing them
conrad
parents:
12168
diff
changeset
|
41 extern const xmm_reg ff_pw_18; |
8430 | 42 extern const uint64_t ff_pw_20; |
12207
44d480f49053
Add header declarations for mmx/sse constants missing them
conrad
parents:
12168
diff
changeset
|
43 extern const xmm_reg ff_pw_27; |
8430 | 44 extern const xmm_reg ff_pw_28; |
45 extern const xmm_reg ff_pw_32; | |
46 extern const uint64_t ff_pw_42; | |
12207
44d480f49053
Add header declarations for mmx/sse constants missing them
conrad
parents:
12168
diff
changeset
|
47 extern const uint64_t ff_pw_53; |
44d480f49053
Add header declarations for mmx/sse constants missing them
conrad
parents:
12168
diff
changeset
|
48 extern const xmm_reg ff_pw_63; |
8816
53f9f3994ec8
convert ff_pw_64 into an xmm_reg for future use in vp6 sse code
aurel
parents:
8590
diff
changeset
|
49 extern const xmm_reg ff_pw_64; |
8430 | 50 extern const uint64_t ff_pw_96; |
51 extern const uint64_t ff_pw_128; | |
52 extern const uint64_t ff_pw_255; | |
53 | |
12168
b246b214c2e9
VP8 H/V inner loopfilter MMX/MMXEXT/SSE2 optimizations.
rbultje
parents:
12143
diff
changeset
|
54 extern const xmm_reg ff_pb_1; |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
11485
diff
changeset
|
55 extern const xmm_reg ff_pb_3; |
8430 | 56 extern const uint64_t ff_pb_7; |
57 extern const uint64_t ff_pb_1F; | |
58 extern const uint64_t ff_pb_3F; | |
59 extern const uint64_t ff_pb_81; | |
12454
f4355cd85faa
Port latest x264 deblock asm (before they moved to using NV12 as internal
rbultje
parents:
12450
diff
changeset
|
60 extern const xmm_reg ff_pb_A1; |
12207
44d480f49053
Add header declarations for mmx/sse constants missing them
conrad
parents:
12168
diff
changeset
|
61 extern const xmm_reg ff_pb_F8; |
8430 | 62 extern const uint64_t ff_pb_FC; |
12207
44d480f49053
Add header declarations for mmx/sse constants missing them
conrad
parents:
12168
diff
changeset
|
63 extern const xmm_reg ff_pb_FE; |
8430 | 64 |
65 extern const double ff_pd_1[2]; | |
66 extern const double ff_pd_2[2]; | |
67 | |
68 #define LOAD4(stride,in,a,b,c,d)\ | |
69 "movq 0*"#stride"+"#in", "#a"\n\t"\ | |
70 "movq 1*"#stride"+"#in", "#b"\n\t"\ | |
71 "movq 2*"#stride"+"#in", "#c"\n\t"\ | |
72 "movq 3*"#stride"+"#in", "#d"\n\t" | |
73 | |
74 #define STORE4(stride,out,a,b,c,d)\ | |
75 "movq "#a", 0*"#stride"+"#out"\n\t"\ | |
76 "movq "#b", 1*"#stride"+"#out"\n\t"\ | |
77 "movq "#c", 2*"#stride"+"#out"\n\t"\ | |
78 "movq "#d", 3*"#stride"+"#out"\n\t" | |
79 | |
80 /* in/out: mma=mma+mmb, mmb=mmb-mma */ | |
81 #define SUMSUB_BA( a, b ) \ | |
82 "paddw "#b", "#a" \n\t"\ | |
83 "paddw "#b", "#b" \n\t"\ | |
84 "psubw "#a", "#b" \n\t" | |
85 | |
86 #define SBUTTERFLY(a,b,t,n,m)\ | |
87 "mov" #m " " #a ", " #t " \n\t" /* abcd */\ | |
88 "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ | |
89 "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ | |
90 | |
91 #define TRANSPOSE4(a,b,c,d,t)\ | |
92 SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\ | |
93 SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\ | |
94 SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ | |
95 SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ | |
96 | |
12450
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
97 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
98 __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
99 "movd %4, %%mm0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
100 "movd %5, %%mm1 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
101 "movd %6, %%mm2 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
102 "movd %7, %%mm3 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
103 "punpcklbw %%mm1, %%mm0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
104 "punpcklbw %%mm3, %%mm2 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
105 "movq %%mm0, %%mm1 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
106 "punpcklwd %%mm2, %%mm0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
107 "punpckhwd %%mm2, %%mm1 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
108 "movd %%mm0, %0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
109 "punpckhdq %%mm0, %%mm0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
110 "movd %%mm0, %1 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
111 "movd %%mm1, %2 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
112 "punpckhdq %%mm1, %%mm1 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
113 "movd %%mm1, %3 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
114 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
115 : "=m" (*(uint32_t*)(dst + 0*dst_stride)), |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
116 "=m" (*(uint32_t*)(dst + 1*dst_stride)), |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
117 "=m" (*(uint32_t*)(dst + 2*dst_stride)), |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
118 "=m" (*(uint32_t*)(dst + 3*dst_stride)) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
119 : "m" (*(uint32_t*)(src + 0*src_stride)), |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
120 "m" (*(uint32_t*)(src + 1*src_stride)), |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
121 "m" (*(uint32_t*)(src + 2*src_stride)), |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
122 "m" (*(uint32_t*)(src + 3*src_stride)) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
123 ); |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
124 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
12435
diff
changeset
|
125 |
8430 | 126 // e,f,g,h can be memory |
127 // out: a,d,t,c | |
128 #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\ | |
129 "punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\ | |
130 "punpcklbw " #f ", " #b " \n\t" /* b0 f0 b1 f1 b2 f2 b3 f3 */\ | |
131 "punpcklbw " #g ", " #c " \n\t" /* c0 g0 c1 g1 c2 g2 d3 g3 */\ | |
132 "punpcklbw " #h ", " #d " \n\t" /* d0 h0 d1 h1 d2 h2 d3 h3 */\ | |
133 SBUTTERFLY(a, b, t, bw, q) /* a= a0 b0 e0 f0 a1 b1 e1 f1 */\ | |
134 /* t= a2 b2 e2 f2 a3 b3 e3 f3 */\ | |
135 SBUTTERFLY(c, d, b, bw, q) /* c= c0 d0 g0 h0 c1 d1 g1 h1 */\ | |
136 /* b= c2 d2 g2 h2 c3 d3 g3 h3 */\ | |
137 SBUTTERFLY(a, c, d, wd, q) /* a= a0 b0 c0 d0 e0 f0 g0 h0 */\ | |
138 /* d= a1 b1 c1 d1 e1 f1 g1 h1 */\ | |
139 SBUTTERFLY(t, b, c, wd, q) /* t= a2 b2 c2 d2 e2 f2 g2 h2 */\ | |
140 /* c= a3 b3 c3 d3 e3 f3 g3 h3 */ | |
141 | |
8590 | 142 #if ARCH_X86_64 |
8430 | 143 // permutes 01234567 -> 05736421 |
144 #define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\ | |
145 SBUTTERFLY(a,b,%%xmm8,wd,dqa)\ | |
146 SBUTTERFLY(c,d,b,wd,dqa)\ | |
147 SBUTTERFLY(e,f,d,wd,dqa)\ | |
148 SBUTTERFLY(g,h,f,wd,dqa)\ | |
149 SBUTTERFLY(a,c,h,dq,dqa)\ | |
150 SBUTTERFLY(%%xmm8,b,c,dq,dqa)\ | |
151 SBUTTERFLY(e,g,b,dq,dqa)\ | |
152 SBUTTERFLY(d,f,g,dq,dqa)\ | |
153 SBUTTERFLY(a,e,f,qdq,dqa)\ | |
154 SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\ | |
155 SBUTTERFLY(h,b,d,qdq,dqa)\ | |
156 SBUTTERFLY(c,g,b,qdq,dqa)\ | |
157 "movdqa %%xmm8, "#g" \n\t" | |
158 #else | |
159 #define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\ | |
160 "movdqa "#h", "#t" \n\t"\ | |
161 SBUTTERFLY(a,b,h,wd,dqa)\ | |
162 "movdqa "#h", 16"#t" \n\t"\ | |
163 "movdqa "#t", "#h" \n\t"\ | |
164 SBUTTERFLY(c,d,b,wd,dqa)\ | |
165 SBUTTERFLY(e,f,d,wd,dqa)\ | |
166 SBUTTERFLY(g,h,f,wd,dqa)\ | |
167 SBUTTERFLY(a,c,h,dq,dqa)\ | |
168 "movdqa "#h", "#t" \n\t"\ | |
169 "movdqa 16"#t", "#h" \n\t"\ | |
170 SBUTTERFLY(h,b,c,dq,dqa)\ | |
171 SBUTTERFLY(e,g,b,dq,dqa)\ | |
172 SBUTTERFLY(d,f,g,dq,dqa)\ | |
173 SBUTTERFLY(a,e,f,qdq,dqa)\ | |
174 SBUTTERFLY(h,d,e,qdq,dqa)\ | |
175 "movdqa "#h", 16"#t" \n\t"\ | |
176 "movdqa "#t", "#h" \n\t"\ | |
177 SBUTTERFLY(h,b,d,qdq,dqa)\ | |
178 SBUTTERFLY(c,g,b,qdq,dqa)\ | |
179 "movdqa 16"#t", "#g" \n\t" | |
180 #endif | |
181 | |
182 #define MOVQ_WONE(regd) \ | |
183 __asm__ volatile ( \ | |
184 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ | |
185 "psrlw $15, %%" #regd ::) | |
186 | |
187 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx); | |
10114
8b9fc0c8f1cc
Move declarations of some mmx functions to dsputil_mmx.h
mru
parents:
8816
diff
changeset
|
188 void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); |
8b9fc0c8f1cc
Move declarations of some mmx functions to dsputil_mmx.h
mru
parents:
8816
diff
changeset
|
189 |
12435
fe78a4548d12
Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents:
12356
diff
changeset
|
190 void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
fe78a4548d12
Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents:
12356
diff
changeset
|
191 void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
fe78a4548d12
Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents:
12356
diff
changeset
|
192 void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
8430 | 193 |
11381 | 194 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); |
195 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |
196 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |
197 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |
198 | |
199 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); | |
200 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); | |
201 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); | |
202 | |
203 void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag, | |
204 double *autoc); | |
205 | |
206 void ff_mmx_idct(DCTELEM *block); | |
207 void ff_mmxext_idct(DCTELEM *block); | |
208 | |
12316 | 209 |
210 void ff_deinterlace_line_mmx(uint8_t *dst, | |
211 const uint8_t *lum_m4, const uint8_t *lum_m3, | |
212 const uint8_t *lum_m2, const uint8_t *lum_m1, | |
213 const uint8_t *lum, | |
214 int size); | |
215 | |
216 void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, | |
217 const uint8_t *lum_m3, | |
218 const uint8_t *lum_m2, | |
219 const uint8_t *lum_m1, | |
220 const uint8_t *lum, int size); | |
221 | |
8430 | 222 #endif /* AVCODEC_X86_DSPUTIL_MMX_H */ |