Mercurial > libavcodec.hg
annotate x86/idct_sse2_xvid.c @ 10952:ea8f891d997d libavcodec
H264 DXVA2 implementation
It allows VLD H264 decoding using DXVA2 (GPU assisted decoding API under
VISTA and Windows 7).
It is implemented by using AVHWAccel API. It has been tested successfully
for some time in VLC using an nvidia card on Windows 7.
To compile it, you need to have the system header dxva2api.h (either from
microsoft or using http://downloads.videolan.org/pub/videolan/testing/contrib/dxva2api.h)
The generated libavcodec.dll does not depend directly on any new lib as
the necessary objects are given by the application using FFmpeg.
author | fenrir |
---|---|
date | Wed, 20 Jan 2010 18:54:51 +0000 |
parents | 8b9fc0c8f1cc |
children | 34a65026fa06 |
rev | line source |
---|---|
8430 | 1 /* |
2 * XVID MPEG-4 VIDEO CODEC | |
3 * - SSE2 inverse discrete cosine transform - | |
4 * | |
5 * Copyright(C) 2003 Pascal Massimino <skal@planet-d.net> | |
6 * | |
7 * Conversion to gcc syntax with modifications | |
8 * by Alexander Strange <astrange@ithinksw.com> | |
9 * | |
10 * Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid. | |
11 * | |
12 * This file is part of FFmpeg. | |
13 * | |
14 * Vertical pass is an implementation of the scheme: | |
15 * Loeffler C., Ligtenberg A., and Moschytz C.S.: | |
16 * Practical Fast 1D DCT Algorithm with Eleven Multiplications, | |
17 * Proc. ICASSP 1989, 988-991. | |
18 * | |
19 * Horizontal pass is a double 4x4 vector/matrix multiplication, | |
20 * (see also Intel's Application Note 922: | |
21 * http://developer.intel.com/vtune/cbts/strmsimd/922down.htm | |
22 * Copyright (C) 1999 Intel Corporation) | |
23 * | |
24 * More details at http://skal.planet-d.net/coding/dct.html | |
25 * | |
26 * FFmpeg is free software; you can redistribute it and/or | |
27 * modify it under the terms of the GNU Lesser General Public | |
28 * License as published by the Free Software Foundation; either | |
29 * version 2.1 of the License, or (at your option) any later version. | |
30 * | |
31 * FFmpeg is distributed in the hope that it will be useful, | |
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
34 * Lesser General Public License for more details. | |
35 * | |
36 * You should have received a copy of the GNU Lesser General Public License | |
37 * along with FFmpeg; if not, write to the Free Software Foundation, | |
38 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
39 */ | |
40 | |
41 #include "libavcodec/dsputil.h" | |
42 #include "idct_xvid.h" | |
10114
8b9fc0c8f1cc
Move declarations of some mmx functions to dsputil_mmx.h
mru
parents:
8718
diff
changeset
|
43 #include "dsputil_mmx.h" |
8430 | 44 |
45 /*! | |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8590
diff
changeset
|
46 * @file libavcodec/x86/idct_sse2_xvid.c |
8430 | 47 * @brief SSE2 idct compatible with xvidmmx |
48 */ | |
49 | |
50 #define X8(x) x,x,x,x,x,x,x,x | |
51 | |
52 #define ROW_SHIFT 11 | |
53 #define COL_SHIFT 6 | |
54 | |
55 DECLARE_ASM_CONST(16, int16_t, tan1[]) = {X8(13036)}; // tan( pi/16) | |
56 DECLARE_ASM_CONST(16, int16_t, tan2[]) = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1 | |
57 DECLARE_ASM_CONST(16, int16_t, tan3[]) = {X8(43790)}; // tan(3pi/16)-1 | |
58 DECLARE_ASM_CONST(16, int16_t, sqrt2[])= {X8(23170)}; // 0.5/sqrt(2) | |
59 DECLARE_ASM_CONST(8, uint8_t, m127[]) = {X8(127)}; | |
60 | |
61 DECLARE_ASM_CONST(16, int16_t, iTab1[]) = { | |
62 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d, | |
63 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61, | |
64 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7, | |
65 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b | |
66 }; | |
67 | |
68 DECLARE_ASM_CONST(16, int16_t, iTab2[]) = { | |
69 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5, | |
70 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04, | |
71 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41, | |
72 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df | |
73 }; | |
74 | |
75 DECLARE_ASM_CONST(16, int16_t, iTab3[]) = { | |
76 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf, | |
77 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf, | |
78 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d, | |
79 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04 | |
80 }; | |
81 | |
82 DECLARE_ASM_CONST(16, int16_t, iTab4[]) = { | |
83 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746, | |
84 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac, | |
85 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df, | |
86 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e | |
87 }; | |
88 | |
89 DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = { | |
90 65536, 65536, 65536, 65536, | |
91 3597, 3597, 3597, 3597, | |
92 2260, 2260, 2260, 2260, | |
93 1203, 1203, 1203, 1203, | |
94 120, 120, 120, 120, | |
95 512, 512, 512, 512 | |
96 }; | |
97 | |
98 // Temporary storage before the column pass | |
99 #define ROW1 "%%xmm6" | |
100 #define ROW3 "%%xmm4" | |
101 #define ROW5 "%%xmm5" | |
102 #define ROW7 "%%xmm7" | |
103 | |
104 #define CLEAR_ODD(r) "pxor "r","r" \n\t" | |
105 #define PUT_ODD(dst) "pshufhw $0x1B, %%xmm2, "dst" \n\t" | |
106 | |
8590 | 107 #if ARCH_X86_64 |
8430 | 108 |
109 # define ROW0 "%%xmm8" | |
110 # define REG0 ROW0 | |
111 # define ROW2 "%%xmm9" | |
112 # define REG2 ROW2 | |
113 # define ROW4 "%%xmm10" | |
114 # define REG4 ROW4 | |
115 # define ROW6 "%%xmm11" | |
116 # define REG6 ROW6 | |
117 # define CLEAR_EVEN(r) CLEAR_ODD(r) | |
118 # define PUT_EVEN(dst) PUT_ODD(dst) | |
119 # define XMMS "%%xmm12" | |
120 # define MOV_32_ONLY "#" | |
121 # define SREG2 REG2 | |
122 # define TAN3 "%%xmm13" | |
123 # define TAN1 "%%xmm14" | |
124 | |
125 #else | |
126 | |
127 # define ROW0 "(%0)" | |
128 # define REG0 "%%xmm4" | |
129 # define ROW2 "2*16(%0)" | |
130 # define REG2 "%%xmm4" | |
131 # define ROW4 "4*16(%0)" | |
132 # define REG4 "%%xmm6" | |
133 # define ROW6 "6*16(%0)" | |
134 # define REG6 "%%xmm6" | |
135 # define CLEAR_EVEN(r) | |
136 # define PUT_EVEN(dst) \ | |
137 "pshufhw $0x1B, %%xmm2, %%xmm2 \n\t" \ | |
138 "movdqa %%xmm2, "dst" \n\t" | |
139 # define XMMS "%%xmm2" | |
140 # define MOV_32_ONLY "movdqa " | |
141 # define SREG2 "%%xmm7" | |
142 # define TAN3 "%%xmm0" | |
143 # define TAN1 "%%xmm2" | |
144 | |
145 #endif | |
146 | |
147 #define ROUND(x) "paddd "MANGLE(x) | |
148 | |
149 #define JZ(reg, to) \ | |
150 "testl "reg","reg" \n\t" \ | |
151 "jz "to" \n\t" | |
152 | |
153 #define JNZ(reg, to) \ | |
154 "testl "reg","reg" \n\t" \ | |
155 "jnz "to" \n\t" | |
156 | |
157 #define TEST_ONE_ROW(src, reg, clear) \ | |
158 clear \ | |
159 "movq "src", %%mm1 \n\t" \ | |
160 "por 8+"src", %%mm1 \n\t" \ | |
161 "paddusb %%mm0, %%mm1 \n\t" \ | |
162 "pmovmskb %%mm1, "reg" \n\t" | |
163 | |
164 #define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \ | |
165 clear1 \ | |
166 clear2 \ | |
167 "movq "row1", %%mm1 \n\t" \ | |
168 "por 8+"row1", %%mm1 \n\t" \ | |
169 "movq "row2", %%mm2 \n\t" \ | |
170 "por 8+"row2", %%mm2 \n\t" \ | |
171 "paddusb %%mm0, %%mm1 \n\t" \ | |
172 "paddusb %%mm0, %%mm2 \n\t" \ | |
173 "pmovmskb %%mm1, "reg1" \n\t" \ | |
174 "pmovmskb %%mm2, "reg2" \n\t" | |
175 | |
176 ///IDCT pass on rows. | |
177 #define iMTX_MULT(src, table, rounder, put) \ | |
178 "movdqa "src", %%xmm3 \n\t" \ | |
179 "movdqa %%xmm3, %%xmm0 \n\t" \ | |
180 "pshufd $0x11, %%xmm3, %%xmm1 \n\t" /* 4602 */ \ | |
181 "punpcklqdq %%xmm0, %%xmm0 \n\t" /* 0246 */ \ | |
182 "pmaddwd "table", %%xmm0 \n\t" \ | |
183 "pmaddwd 16+"table", %%xmm1 \n\t" \ | |
184 "pshufd $0xBB, %%xmm3, %%xmm2 \n\t" /* 5713 */ \ | |
185 "punpckhqdq %%xmm3, %%xmm3 \n\t" /* 1357 */ \ | |
186 "pmaddwd 32+"table", %%xmm2 \n\t" \ | |
187 "pmaddwd 48+"table", %%xmm3 \n\t" \ | |
188 "paddd %%xmm1, %%xmm0 \n\t" \ | |
189 "paddd %%xmm3, %%xmm2 \n\t" \ | |
190 rounder", %%xmm0 \n\t" \ | |
191 "movdqa %%xmm2, %%xmm3 \n\t" \ | |
192 "paddd %%xmm0, %%xmm2 \n\t" \ | |
193 "psubd %%xmm3, %%xmm0 \n\t" \ | |
194 "psrad $11, %%xmm2 \n\t" \ | |
195 "psrad $11, %%xmm0 \n\t" \ | |
196 "packssdw %%xmm0, %%xmm2 \n\t" \ | |
197 put \ | |
198 "1: \n\t" | |
199 | |
200 #define iLLM_HEAD \ | |
201 "movdqa "MANGLE(tan3)", "TAN3" \n\t" \ | |
202 "movdqa "MANGLE(tan1)", "TAN1" \n\t" \ | |
203 | |
204 ///IDCT pass on columns. | |
205 #define iLLM_PASS(dct) \ | |
206 "movdqa "TAN3", %%xmm1 \n\t" \ | |
207 "movdqa "TAN1", %%xmm3 \n\t" \ | |
208 "pmulhw %%xmm4, "TAN3" \n\t" \ | |
209 "pmulhw %%xmm5, %%xmm1 \n\t" \ | |
210 "paddsw %%xmm4, "TAN3" \n\t" \ | |
211 "paddsw %%xmm5, %%xmm1 \n\t" \ | |
212 "psubsw %%xmm5, "TAN3" \n\t" \ | |
213 "paddsw %%xmm4, %%xmm1 \n\t" \ | |
214 "pmulhw %%xmm7, %%xmm3 \n\t" \ | |
215 "pmulhw %%xmm6, "TAN1" \n\t" \ | |
216 "paddsw %%xmm6, %%xmm3 \n\t" \ | |
217 "psubsw %%xmm7, "TAN1" \n\t" \ | |
218 "movdqa %%xmm3, %%xmm7 \n\t" \ | |
219 "movdqa "TAN1", %%xmm6 \n\t" \ | |
220 "psubsw %%xmm1, %%xmm3 \n\t" \ | |
221 "psubsw "TAN3", "TAN1" \n\t" \ | |
222 "paddsw %%xmm7, %%xmm1 \n\t" \ | |
223 "paddsw %%xmm6, "TAN3" \n\t" \ | |
224 "movdqa %%xmm3, %%xmm6 \n\t" \ | |
225 "psubsw "TAN3", %%xmm3 \n\t" \ | |
226 "paddsw %%xmm6, "TAN3" \n\t" \ | |
227 "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \ | |
228 "pmulhw %%xmm4, %%xmm3 \n\t" \ | |
229 "pmulhw %%xmm4, "TAN3" \n\t" \ | |
230 "paddsw "TAN3", "TAN3" \n\t" \ | |
231 "paddsw %%xmm3, %%xmm3 \n\t" \ | |
232 "movdqa "MANGLE(tan2)", %%xmm7 \n\t" \ | |
233 MOV_32_ONLY ROW2", "REG2" \n\t" \ | |
234 MOV_32_ONLY ROW6", "REG6" \n\t" \ | |
235 "movdqa %%xmm7, %%xmm5 \n\t" \ | |
236 "pmulhw "REG6", %%xmm7 \n\t" \ | |
237 "pmulhw "REG2", %%xmm5 \n\t" \ | |
238 "paddsw "REG2", %%xmm7 \n\t" \ | |
239 "psubsw "REG6", %%xmm5 \n\t" \ | |
240 MOV_32_ONLY ROW0", "REG0" \n\t" \ | |
241 MOV_32_ONLY ROW4", "REG4" \n\t" \ | |
242 MOV_32_ONLY" "TAN1", (%0) \n\t" \ | |
243 "movdqa "REG0", "XMMS" \n\t" \ | |
244 "psubsw "REG4", "REG0" \n\t" \ | |
245 "paddsw "XMMS", "REG4" \n\t" \ | |
246 "movdqa "REG4", "XMMS" \n\t" \ | |
247 "psubsw %%xmm7, "REG4" \n\t" \ | |
248 "paddsw "XMMS", %%xmm7 \n\t" \ | |
249 "movdqa "REG0", "XMMS" \n\t" \ | |
250 "psubsw %%xmm5, "REG0" \n\t" \ | |
251 "paddsw "XMMS", %%xmm5 \n\t" \ | |
252 "movdqa %%xmm5, "XMMS" \n\t" \ | |
253 "psubsw "TAN3", %%xmm5 \n\t" \ | |
254 "paddsw "XMMS", "TAN3" \n\t" \ | |
255 "movdqa "REG0", "XMMS" \n\t" \ | |
256 "psubsw %%xmm3, "REG0" \n\t" \ | |
257 "paddsw "XMMS", %%xmm3 \n\t" \ | |
258 MOV_32_ONLY" (%0), "TAN1" \n\t" \ | |
259 "psraw $6, %%xmm5 \n\t" \ | |
260 "psraw $6, "REG0" \n\t" \ | |
261 "psraw $6, "TAN3" \n\t" \ | |
262 "psraw $6, %%xmm3 \n\t" \ | |
263 "movdqa "TAN3", 1*16("dct") \n\t" \ | |
264 "movdqa %%xmm3, 2*16("dct") \n\t" \ | |
265 "movdqa "REG0", 5*16("dct") \n\t" \ | |
266 "movdqa %%xmm5, 6*16("dct") \n\t" \ | |
267 "movdqa %%xmm7, %%xmm0 \n\t" \ | |
268 "movdqa "REG4", %%xmm4 \n\t" \ | |
269 "psubsw %%xmm1, %%xmm7 \n\t" \ | |
270 "psubsw "TAN1", "REG4" \n\t" \ | |
271 "paddsw %%xmm0, %%xmm1 \n\t" \ | |
272 "paddsw %%xmm4, "TAN1" \n\t" \ | |
273 "psraw $6, %%xmm1 \n\t" \ | |
274 "psraw $6, %%xmm7 \n\t" \ | |
275 "psraw $6, "TAN1" \n\t" \ | |
276 "psraw $6, "REG4" \n\t" \ | |
277 "movdqa %%xmm1, ("dct") \n\t" \ | |
278 "movdqa "TAN1", 3*16("dct") \n\t" \ | |
279 "movdqa "REG4", 4*16("dct") \n\t" \ | |
280 "movdqa %%xmm7, 7*16("dct") \n\t" | |
281 | |
282 ///IDCT pass on columns, assuming rows 4-7 are zero. | |
283 #define iLLM_PASS_SPARSE(dct) \ | |
284 "pmulhw %%xmm4, "TAN3" \n\t" \ | |
285 "paddsw %%xmm4, "TAN3" \n\t" \ | |
286 "movdqa %%xmm6, %%xmm3 \n\t" \ | |
287 "pmulhw %%xmm6, "TAN1" \n\t" \ | |
288 "movdqa %%xmm4, %%xmm1 \n\t" \ | |
289 "psubsw %%xmm1, %%xmm3 \n\t" \ | |
290 "paddsw %%xmm6, %%xmm1 \n\t" \ | |
291 "movdqa "TAN1", %%xmm6 \n\t" \ | |
292 "psubsw "TAN3", "TAN1" \n\t" \ | |
293 "paddsw %%xmm6, "TAN3" \n\t" \ | |
294 "movdqa %%xmm3, %%xmm6 \n\t" \ | |
295 "psubsw "TAN3", %%xmm3 \n\t" \ | |
296 "paddsw %%xmm6, "TAN3" \n\t" \ | |
297 "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \ | |
298 "pmulhw %%xmm4, %%xmm3 \n\t" \ | |
299 "pmulhw %%xmm4, "TAN3" \n\t" \ | |
300 "paddsw "TAN3", "TAN3" \n\t" \ | |
301 "paddsw %%xmm3, %%xmm3 \n\t" \ | |
302 "movdqa "MANGLE(tan2)", %%xmm5 \n\t" \ | |
303 MOV_32_ONLY ROW2", "SREG2" \n\t" \ | |
304 "pmulhw "SREG2", %%xmm5 \n\t" \ | |
305 MOV_32_ONLY ROW0", "REG0" \n\t" \ | |
306 "movdqa "REG0", %%xmm6 \n\t" \ | |
307 "psubsw "SREG2", %%xmm6 \n\t" \ | |
308 "paddsw "REG0", "SREG2" \n\t" \ | |
309 MOV_32_ONLY" "TAN1", (%0) \n\t" \ | |
310 "movdqa "REG0", "XMMS" \n\t" \ | |
311 "psubsw %%xmm5, "REG0" \n\t" \ | |
312 "paddsw "XMMS", %%xmm5 \n\t" \ | |
313 "movdqa %%xmm5, "XMMS" \n\t" \ | |
314 "psubsw "TAN3", %%xmm5 \n\t" \ | |
315 "paddsw "XMMS", "TAN3" \n\t" \ | |
316 "movdqa "REG0", "XMMS" \n\t" \ | |
317 "psubsw %%xmm3, "REG0" \n\t" \ | |
318 "paddsw "XMMS", %%xmm3 \n\t" \ | |
319 MOV_32_ONLY" (%0), "TAN1" \n\t" \ | |
320 "psraw $6, %%xmm5 \n\t" \ | |
321 "psraw $6, "REG0" \n\t" \ | |
322 "psraw $6, "TAN3" \n\t" \ | |
323 "psraw $6, %%xmm3 \n\t" \ | |
324 "movdqa "TAN3", 1*16("dct") \n\t" \ | |
325 "movdqa %%xmm3, 2*16("dct") \n\t" \ | |
326 "movdqa "REG0", 5*16("dct") \n\t" \ | |
327 "movdqa %%xmm5, 6*16("dct") \n\t" \ | |
328 "movdqa "SREG2", %%xmm0 \n\t" \ | |
329 "movdqa %%xmm6, %%xmm4 \n\t" \ | |
330 "psubsw %%xmm1, "SREG2" \n\t" \ | |
331 "psubsw "TAN1", %%xmm6 \n\t" \ | |
332 "paddsw %%xmm0, %%xmm1 \n\t" \ | |
333 "paddsw %%xmm4, "TAN1" \n\t" \ | |
334 "psraw $6, %%xmm1 \n\t" \ | |
335 "psraw $6, "SREG2" \n\t" \ | |
336 "psraw $6, "TAN1" \n\t" \ | |
337 "psraw $6, %%xmm6 \n\t" \ | |
338 "movdqa %%xmm1, ("dct") \n\t" \ | |
339 "movdqa "TAN1", 3*16("dct") \n\t" \ | |
340 "movdqa %%xmm6, 4*16("dct") \n\t" \ | |
341 "movdqa "SREG2", 7*16("dct") \n\t" | |
342 | |
343 inline void ff_idct_xvid_sse2(short *block) | |
344 { | |
345 __asm__ volatile( | |
346 "movq "MANGLE(m127)", %%mm0 \n\t" | |
347 iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0)) | |
348 iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1)) | |
349 iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+2*16), PUT_EVEN(ROW2)) | |
350 | |
351 TEST_TWO_ROWS("3*16(%0)", "4*16(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4)) | |
352 JZ("%%eax", "1f") | |
353 iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+3*16), PUT_ODD(ROW3)) | |
354 | |
355 TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6)) | |
356 TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7)) | |
357 iLLM_HEAD | |
358 ASMALIGN(4) | |
359 JNZ("%%ecx", "2f") | |
360 JNZ("%%eax", "3f") | |
361 JNZ("%%edx", "4f") | |
362 JNZ("%%esi", "5f") | |
363 iLLM_PASS_SPARSE("%0") | |
364 "jmp 6f \n\t" | |
365 "2: \n\t" | |
366 iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4)) | |
367 "3: \n\t" | |
368 iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+4*16), PUT_ODD(ROW5)) | |
369 JZ("%%edx", "1f") | |
370 "4: \n\t" | |
371 iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+5*16), PUT_EVEN(ROW6)) | |
372 JZ("%%esi", "1f") | |
373 "5: \n\t" | |
374 iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+5*16), PUT_ODD(ROW7)) | |
8590 | 375 #if !ARCH_X86_64 |
8430 | 376 iLLM_HEAD |
377 #endif | |
378 iLLM_PASS("%0") | |
379 "6: \n\t" | |
380 : "+r"(block) | |
381 : | |
382 : "%eax", "%ecx", "%edx", "%esi", "memory"); | |
383 } | |
384 | |
385 void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block) | |
386 { | |
387 ff_idct_xvid_sse2(block); | |
388 put_pixels_clamped_mmx(block, dest, line_size); | |
389 } | |
390 | |
391 void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block) | |
392 { | |
393 ff_idct_xvid_sse2(block); | |
394 add_pixels_clamped_mmx(block, dest, line_size); | |
395 } |