Mercurial > libavcodec.hg
annotate x86/idct_sse2_xvid.c @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | fe78a4548d12 |
children |
rev | line source |
---|---|
8430 | 1 /* |
2 * XVID MPEG-4 VIDEO CODEC | |
3 * - SSE2 inverse discrete cosine transform - | |
4 * | |
5 * Copyright(C) 2003 Pascal Massimino <skal@planet-d.net> | |
6 * | |
7 * Conversion to gcc syntax with modifications | |
8 * by Alexander Strange <astrange@ithinksw.com> | |
9 * | |
10 * Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid. | |
11 * | |
12 * This file is part of FFmpeg. | |
13 * | |
14 * Vertical pass is an implementation of the scheme: | |
15 * Loeffler C., Ligtenberg A., and Moschytz C.S.: | |
16 * Practical Fast 1D DCT Algorithm with Eleven Multiplications, | |
17 * Proc. ICASSP 1989, 988-991. | |
18 * | |
19 * Horizontal pass is a double 4x4 vector/matrix multiplication, | |
20 * (see also Intel's Application Note 922: | |
21 * http://developer.intel.com/vtune/cbts/strmsimd/922down.htm | |
22 * Copyright (C) 1999 Intel Corporation) | |
23 * | |
24 * More details at http://skal.planet-d.net/coding/dct.html | |
25 * | |
26 * FFmpeg is free software; you can redistribute it and/or | |
27 * modify it under the terms of the GNU Lesser General Public | |
28 * License as published by the Free Software Foundation; either | |
29 * version 2.1 of the License, or (at your option) any later version. | |
30 * | |
31 * FFmpeg is distributed in the hope that it will be useful, | |
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
34 * Lesser General Public License for more details. | |
35 * | |
36 * You should have received a copy of the GNU Lesser General Public License | |
37 * along with FFmpeg; if not, write to the Free Software Foundation, | |
38 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
39 */ | |
40 | |
41 #include "libavcodec/dsputil.h" | |
42 #include "idct_xvid.h" | |
10114
8b9fc0c8f1cc
Move declarations of some mmx functions to dsputil_mmx.h
mru
parents:
8718
diff
changeset
|
43 #include "dsputil_mmx.h" |
8430 | 44 |
45 /*! | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
10961
diff
changeset
|
46 * @file |
8430 | 47 * @brief SSE2 idct compatible with xvidmmx |
48 */ | |
49 | |
50 #define X8(x) x,x,x,x,x,x,x,x | |
51 | |
52 #define ROW_SHIFT 11 | |
53 #define COL_SHIFT 6 | |
54 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
55 DECLARE_ASM_CONST(16, int16_t, tan1)[] = {X8(13036)}; // tan( pi/16) |
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
56 DECLARE_ASM_CONST(16, int16_t, tan2)[] = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1 |
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
57 DECLARE_ASM_CONST(16, int16_t, tan3)[] = {X8(43790)}; // tan(3pi/16)-1 |
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
58 DECLARE_ASM_CONST(16, int16_t, sqrt2)[]= {X8(23170)}; // 0.5/sqrt(2) |
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
59 DECLARE_ASM_CONST(8, uint8_t, m127)[] = {X8(127)}; |
8430 | 60 |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
61 DECLARE_ASM_CONST(16, int16_t, iTab1)[] = { |
8430 | 62 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d, |
63 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61, | |
64 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7, | |
65 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b | |
66 }; | |
67 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
68 DECLARE_ASM_CONST(16, int16_t, iTab2)[] = { |
8430 | 69 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5, |
70 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04, | |
71 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41, | |
72 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df | |
73 }; | |
74 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
75 DECLARE_ASM_CONST(16, int16_t, iTab3)[] = { |
8430 | 76 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf, |
77 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf, | |
78 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d, | |
79 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04 | |
80 }; | |
81 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
82 DECLARE_ASM_CONST(16, int16_t, iTab4)[] = { |
8430 | 83 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746, |
84 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac, | |
85 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df, | |
86 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e | |
87 }; | |
88 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10114
diff
changeset
|
89 DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders)[] = { |
8430 | 90 65536, 65536, 65536, 65536, |
91 3597, 3597, 3597, 3597, | |
92 2260, 2260, 2260, 2260, | |
93 1203, 1203, 1203, 1203, | |
94 120, 120, 120, 120, | |
95 512, 512, 512, 512 | |
96 }; | |
97 | |
98 // Temporary storage before the column pass | |
99 #define ROW1 "%%xmm6" | |
100 #define ROW3 "%%xmm4" | |
101 #define ROW5 "%%xmm5" | |
102 #define ROW7 "%%xmm7" | |
103 | |
104 #define CLEAR_ODD(r) "pxor "r","r" \n\t" | |
105 #define PUT_ODD(dst) "pshufhw $0x1B, %%xmm2, "dst" \n\t" | |
106 | |
8590 | 107 #if ARCH_X86_64 |
8430 | 108 |
109 # define ROW0 "%%xmm8" | |
110 # define REG0 ROW0 | |
111 # define ROW2 "%%xmm9" | |
112 # define REG2 ROW2 | |
113 # define ROW4 "%%xmm10" | |
114 # define REG4 ROW4 | |
115 # define ROW6 "%%xmm11" | |
116 # define REG6 ROW6 | |
117 # define CLEAR_EVEN(r) CLEAR_ODD(r) | |
118 # define PUT_EVEN(dst) PUT_ODD(dst) | |
119 # define XMMS "%%xmm12" | |
120 # define MOV_32_ONLY "#" | |
121 # define SREG2 REG2 | |
122 # define TAN3 "%%xmm13" | |
123 # define TAN1 "%%xmm14" | |
124 | |
125 #else | |
126 | |
127 # define ROW0 "(%0)" | |
128 # define REG0 "%%xmm4" | |
129 # define ROW2 "2*16(%0)" | |
130 # define REG2 "%%xmm4" | |
131 # define ROW4 "4*16(%0)" | |
132 # define REG4 "%%xmm6" | |
133 # define ROW6 "6*16(%0)" | |
134 # define REG6 "%%xmm6" | |
135 # define CLEAR_EVEN(r) | |
136 # define PUT_EVEN(dst) \ | |
137 "pshufhw $0x1B, %%xmm2, %%xmm2 \n\t" \ | |
138 "movdqa %%xmm2, "dst" \n\t" | |
139 # define XMMS "%%xmm2" | |
140 # define MOV_32_ONLY "movdqa " | |
141 # define SREG2 "%%xmm7" | |
142 # define TAN3 "%%xmm0" | |
143 # define TAN1 "%%xmm2" | |
144 | |
145 #endif | |
146 | |
147 #define ROUND(x) "paddd "MANGLE(x) | |
148 | |
149 #define JZ(reg, to) \ | |
150 "testl "reg","reg" \n\t" \ | |
151 "jz "to" \n\t" | |
152 | |
153 #define JNZ(reg, to) \ | |
154 "testl "reg","reg" \n\t" \ | |
155 "jnz "to" \n\t" | |
156 | |
157 #define TEST_ONE_ROW(src, reg, clear) \ | |
158 clear \ | |
159 "movq "src", %%mm1 \n\t" \ | |
160 "por 8+"src", %%mm1 \n\t" \ | |
161 "paddusb %%mm0, %%mm1 \n\t" \ | |
162 "pmovmskb %%mm1, "reg" \n\t" | |
163 | |
164 #define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \ | |
165 clear1 \ | |
166 clear2 \ | |
167 "movq "row1", %%mm1 \n\t" \ | |
168 "por 8+"row1", %%mm1 \n\t" \ | |
169 "movq "row2", %%mm2 \n\t" \ | |
170 "por 8+"row2", %%mm2 \n\t" \ | |
171 "paddusb %%mm0, %%mm1 \n\t" \ | |
172 "paddusb %%mm0, %%mm2 \n\t" \ | |
173 "pmovmskb %%mm1, "reg1" \n\t" \ | |
174 "pmovmskb %%mm2, "reg2" \n\t" | |
175 | |
176 ///IDCT pass on rows. | |
177 #define iMTX_MULT(src, table, rounder, put) \ | |
178 "movdqa "src", %%xmm3 \n\t" \ | |
179 "movdqa %%xmm3, %%xmm0 \n\t" \ | |
180 "pshufd $0x11, %%xmm3, %%xmm1 \n\t" /* 4602 */ \ | |
181 "punpcklqdq %%xmm0, %%xmm0 \n\t" /* 0246 */ \ | |
182 "pmaddwd "table", %%xmm0 \n\t" \ | |
183 "pmaddwd 16+"table", %%xmm1 \n\t" \ | |
184 "pshufd $0xBB, %%xmm3, %%xmm2 \n\t" /* 5713 */ \ | |
185 "punpckhqdq %%xmm3, %%xmm3 \n\t" /* 1357 */ \ | |
186 "pmaddwd 32+"table", %%xmm2 \n\t" \ | |
187 "pmaddwd 48+"table", %%xmm3 \n\t" \ | |
188 "paddd %%xmm1, %%xmm0 \n\t" \ | |
189 "paddd %%xmm3, %%xmm2 \n\t" \ | |
190 rounder", %%xmm0 \n\t" \ | |
191 "movdqa %%xmm2, %%xmm3 \n\t" \ | |
192 "paddd %%xmm0, %%xmm2 \n\t" \ | |
193 "psubd %%xmm3, %%xmm0 \n\t" \ | |
194 "psrad $11, %%xmm2 \n\t" \ | |
195 "psrad $11, %%xmm0 \n\t" \ | |
196 "packssdw %%xmm0, %%xmm2 \n\t" \ | |
197 put \ | |
198 "1: \n\t" | |
199 | |
200 #define iLLM_HEAD \ | |
201 "movdqa "MANGLE(tan3)", "TAN3" \n\t" \ | |
202 "movdqa "MANGLE(tan1)", "TAN1" \n\t" \ | |
203 | |
204 ///IDCT pass on columns. | |
205 #define iLLM_PASS(dct) \ | |
206 "movdqa "TAN3", %%xmm1 \n\t" \ | |
207 "movdqa "TAN1", %%xmm3 \n\t" \ | |
208 "pmulhw %%xmm4, "TAN3" \n\t" \ | |
209 "pmulhw %%xmm5, %%xmm1 \n\t" \ | |
210 "paddsw %%xmm4, "TAN3" \n\t" \ | |
211 "paddsw %%xmm5, %%xmm1 \n\t" \ | |
212 "psubsw %%xmm5, "TAN3" \n\t" \ | |
213 "paddsw %%xmm4, %%xmm1 \n\t" \ | |
214 "pmulhw %%xmm7, %%xmm3 \n\t" \ | |
215 "pmulhw %%xmm6, "TAN1" \n\t" \ | |
216 "paddsw %%xmm6, %%xmm3 \n\t" \ | |
217 "psubsw %%xmm7, "TAN1" \n\t" \ | |
218 "movdqa %%xmm3, %%xmm7 \n\t" \ | |
219 "movdqa "TAN1", %%xmm6 \n\t" \ | |
220 "psubsw %%xmm1, %%xmm3 \n\t" \ | |
221 "psubsw "TAN3", "TAN1" \n\t" \ | |
222 "paddsw %%xmm7, %%xmm1 \n\t" \ | |
223 "paddsw %%xmm6, "TAN3" \n\t" \ | |
224 "movdqa %%xmm3, %%xmm6 \n\t" \ | |
225 "psubsw "TAN3", %%xmm3 \n\t" \ | |
226 "paddsw %%xmm6, "TAN3" \n\t" \ | |
227 "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \ | |
228 "pmulhw %%xmm4, %%xmm3 \n\t" \ | |
229 "pmulhw %%xmm4, "TAN3" \n\t" \ | |
230 "paddsw "TAN3", "TAN3" \n\t" \ | |
231 "paddsw %%xmm3, %%xmm3 \n\t" \ | |
232 "movdqa "MANGLE(tan2)", %%xmm7 \n\t" \ | |
233 MOV_32_ONLY ROW2", "REG2" \n\t" \ | |
234 MOV_32_ONLY ROW6", "REG6" \n\t" \ | |
235 "movdqa %%xmm7, %%xmm5 \n\t" \ | |
236 "pmulhw "REG6", %%xmm7 \n\t" \ | |
237 "pmulhw "REG2", %%xmm5 \n\t" \ | |
238 "paddsw "REG2", %%xmm7 \n\t" \ | |
239 "psubsw "REG6", %%xmm5 \n\t" \ | |
240 MOV_32_ONLY ROW0", "REG0" \n\t" \ | |
241 MOV_32_ONLY ROW4", "REG4" \n\t" \ | |
242 MOV_32_ONLY" "TAN1", (%0) \n\t" \ | |
243 "movdqa "REG0", "XMMS" \n\t" \ | |
244 "psubsw "REG4", "REG0" \n\t" \ | |
245 "paddsw "XMMS", "REG4" \n\t" \ | |
246 "movdqa "REG4", "XMMS" \n\t" \ | |
247 "psubsw %%xmm7, "REG4" \n\t" \ | |
248 "paddsw "XMMS", %%xmm7 \n\t" \ | |
249 "movdqa "REG0", "XMMS" \n\t" \ | |
250 "psubsw %%xmm5, "REG0" \n\t" \ | |
251 "paddsw "XMMS", %%xmm5 \n\t" \ | |
252 "movdqa %%xmm5, "XMMS" \n\t" \ | |
253 "psubsw "TAN3", %%xmm5 \n\t" \ | |
254 "paddsw "XMMS", "TAN3" \n\t" \ | |
255 "movdqa "REG0", "XMMS" \n\t" \ | |
256 "psubsw %%xmm3, "REG0" \n\t" \ | |
257 "paddsw "XMMS", %%xmm3 \n\t" \ | |
258 MOV_32_ONLY" (%0), "TAN1" \n\t" \ | |
259 "psraw $6, %%xmm5 \n\t" \ | |
260 "psraw $6, "REG0" \n\t" \ | |
261 "psraw $6, "TAN3" \n\t" \ | |
262 "psraw $6, %%xmm3 \n\t" \ | |
263 "movdqa "TAN3", 1*16("dct") \n\t" \ | |
264 "movdqa %%xmm3, 2*16("dct") \n\t" \ | |
265 "movdqa "REG0", 5*16("dct") \n\t" \ | |
266 "movdqa %%xmm5, 6*16("dct") \n\t" \ | |
267 "movdqa %%xmm7, %%xmm0 \n\t" \ | |
268 "movdqa "REG4", %%xmm4 \n\t" \ | |
269 "psubsw %%xmm1, %%xmm7 \n\t" \ | |
270 "psubsw "TAN1", "REG4" \n\t" \ | |
271 "paddsw %%xmm0, %%xmm1 \n\t" \ | |
272 "paddsw %%xmm4, "TAN1" \n\t" \ | |
273 "psraw $6, %%xmm1 \n\t" \ | |
274 "psraw $6, %%xmm7 \n\t" \ | |
275 "psraw $6, "TAN1" \n\t" \ | |
276 "psraw $6, "REG4" \n\t" \ | |
277 "movdqa %%xmm1, ("dct") \n\t" \ | |
278 "movdqa "TAN1", 3*16("dct") \n\t" \ | |
279 "movdqa "REG4", 4*16("dct") \n\t" \ | |
280 "movdqa %%xmm7, 7*16("dct") \n\t" | |
281 | |
282 ///IDCT pass on columns, assuming rows 4-7 are zero. | |
283 #define iLLM_PASS_SPARSE(dct) \ | |
284 "pmulhw %%xmm4, "TAN3" \n\t" \ | |
285 "paddsw %%xmm4, "TAN3" \n\t" \ | |
286 "movdqa %%xmm6, %%xmm3 \n\t" \ | |
287 "pmulhw %%xmm6, "TAN1" \n\t" \ | |
288 "movdqa %%xmm4, %%xmm1 \n\t" \ | |
289 "psubsw %%xmm1, %%xmm3 \n\t" \ | |
290 "paddsw %%xmm6, %%xmm1 \n\t" \ | |
291 "movdqa "TAN1", %%xmm6 \n\t" \ | |
292 "psubsw "TAN3", "TAN1" \n\t" \ | |
293 "paddsw %%xmm6, "TAN3" \n\t" \ | |
294 "movdqa %%xmm3, %%xmm6 \n\t" \ | |
295 "psubsw "TAN3", %%xmm3 \n\t" \ | |
296 "paddsw %%xmm6, "TAN3" \n\t" \ | |
297 "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \ | |
298 "pmulhw %%xmm4, %%xmm3 \n\t" \ | |
299 "pmulhw %%xmm4, "TAN3" \n\t" \ | |
300 "paddsw "TAN3", "TAN3" \n\t" \ | |
301 "paddsw %%xmm3, %%xmm3 \n\t" \ | |
302 "movdqa "MANGLE(tan2)", %%xmm5 \n\t" \ | |
303 MOV_32_ONLY ROW2", "SREG2" \n\t" \ | |
304 "pmulhw "SREG2", %%xmm5 \n\t" \ | |
305 MOV_32_ONLY ROW0", "REG0" \n\t" \ | |
306 "movdqa "REG0", %%xmm6 \n\t" \ | |
307 "psubsw "SREG2", %%xmm6 \n\t" \ | |
308 "paddsw "REG0", "SREG2" \n\t" \ | |
309 MOV_32_ONLY" "TAN1", (%0) \n\t" \ | |
310 "movdqa "REG0", "XMMS" \n\t" \ | |
311 "psubsw %%xmm5, "REG0" \n\t" \ | |
312 "paddsw "XMMS", %%xmm5 \n\t" \ | |
313 "movdqa %%xmm5, "XMMS" \n\t" \ | |
314 "psubsw "TAN3", %%xmm5 \n\t" \ | |
315 "paddsw "XMMS", "TAN3" \n\t" \ | |
316 "movdqa "REG0", "XMMS" \n\t" \ | |
317 "psubsw %%xmm3, "REG0" \n\t" \ | |
318 "paddsw "XMMS", %%xmm3 \n\t" \ | |
319 MOV_32_ONLY" (%0), "TAN1" \n\t" \ | |
320 "psraw $6, %%xmm5 \n\t" \ | |
321 "psraw $6, "REG0" \n\t" \ | |
322 "psraw $6, "TAN3" \n\t" \ | |
323 "psraw $6, %%xmm3 \n\t" \ | |
324 "movdqa "TAN3", 1*16("dct") \n\t" \ | |
325 "movdqa %%xmm3, 2*16("dct") \n\t" \ | |
326 "movdqa "REG0", 5*16("dct") \n\t" \ | |
327 "movdqa %%xmm5, 6*16("dct") \n\t" \ | |
328 "movdqa "SREG2", %%xmm0 \n\t" \ | |
329 "movdqa %%xmm6, %%xmm4 \n\t" \ | |
330 "psubsw %%xmm1, "SREG2" \n\t" \ | |
331 "psubsw "TAN1", %%xmm6 \n\t" \ | |
332 "paddsw %%xmm0, %%xmm1 \n\t" \ | |
333 "paddsw %%xmm4, "TAN1" \n\t" \ | |
334 "psraw $6, %%xmm1 \n\t" \ | |
335 "psraw $6, "SREG2" \n\t" \ | |
336 "psraw $6, "TAN1" \n\t" \ | |
337 "psraw $6, %%xmm6 \n\t" \ | |
338 "movdqa %%xmm1, ("dct") \n\t" \ | |
339 "movdqa "TAN1", 3*16("dct") \n\t" \ | |
340 "movdqa %%xmm6, 4*16("dct") \n\t" \ | |
341 "movdqa "SREG2", 7*16("dct") \n\t" | |
342 | |
343 inline void ff_idct_xvid_sse2(short *block) | |
344 { | |
345 __asm__ volatile( | |
346 "movq "MANGLE(m127)", %%mm0 \n\t" | |
347 iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0)) | |
348 iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1)) | |
349 iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+2*16), PUT_EVEN(ROW2)) | |
350 | |
351 TEST_TWO_ROWS("3*16(%0)", "4*16(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4)) | |
352 JZ("%%eax", "1f") | |
353 iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+3*16), PUT_ODD(ROW3)) | |
354 | |
355 TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6)) | |
356 TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7)) | |
357 iLLM_HEAD | |
358 ASMALIGN(4) | |
359 JNZ("%%ecx", "2f") | |
360 JNZ("%%eax", "3f") | |
361 JNZ("%%edx", "4f") | |
362 JNZ("%%esi", "5f") | |
363 iLLM_PASS_SPARSE("%0") | |
364 "jmp 6f \n\t" | |
365 "2: \n\t" | |
366 iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4)) | |
367 "3: \n\t" | |
368 iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+4*16), PUT_ODD(ROW5)) | |
369 JZ("%%edx", "1f") | |
370 "4: \n\t" | |
371 iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+5*16), PUT_EVEN(ROW6)) | |
372 JZ("%%esi", "1f") | |
373 "5: \n\t" | |
374 iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+5*16), PUT_ODD(ROW7)) | |
8590 | 375 #if !ARCH_X86_64 |
8430 | 376 iLLM_HEAD |
377 #endif | |
378 iLLM_PASS("%0") | |
379 "6: \n\t" | |
380 : "+r"(block) | |
381 : | |
382 : "%eax", "%ecx", "%edx", "%esi", "memory"); | |
383 } | |
384 | |
385 void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block) | |
386 { | |
387 ff_idct_xvid_sse2(block); | |
12435
fe78a4548d12
Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents:
11644
diff
changeset
|
388 ff_put_pixels_clamped_mmx(block, dest, line_size); |
8430 | 389 } |
390 | |
391 void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block) | |
392 { | |
393 ff_idct_xvid_sse2(block); | |
12435
fe78a4548d12
Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents:
11644
diff
changeset
|
394 ff_add_pixels_clamped_mmx(block, dest, line_size); |
8430 | 395 } |