Mercurial > libavcodec.hg
annotate i386/dsputil_mmx.c @ 118:3dd1950ac98d libavcodec
brightness / contrast fix/copy optimizations +2% speedup
author | michael |
---|---|
date | Tue, 23 Oct 2001 01:46:50 +0000 |
parents | 8068c4bce9c1 |
children | ae0516eadae2 |
rev | line source |
---|---|
0 | 1 /* |
2 * MMX optimized DSP utils | |
3 * Copyright (c) 2000, 2001 Gerard Lantau. | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
18 * | |
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
20 */ | |
21 | |
22 #include "../dsputil.h" | |
23 | |
5 | 24 int mm_flags; /* multimedia extension flags */ |
25 | |
0 | 26 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); |
27 int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h); | |
28 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | |
29 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | |
30 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | |
31 | |
42 | 32 /* external functions, from idct_mmx.c */ |
33 void ff_mmx_idct(DCTELEM *block); | |
34 void ff_mmxext_idct(DCTELEM *block); | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
8
diff
changeset
|
35 |
0 | 36 /* pixel operations */ |
8 | 37 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; |
38 static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; | |
39 //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; | |
40 //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; | |
0 | 41 |
42 /***********************************/ | |
43 /* 3Dnow specific */ | |
44 | |
45 #define DEF(x) x ## _3dnow | |
46 /* for Athlons PAVGUSB is prefered */ | |
47 #define PAVGB "pavgusb" | |
48 | |
49 #include "dsputil_mmx_avg.h" | |
50 | |
51 #undef DEF | |
52 #undef PAVGB | |
53 | |
54 /***********************************/ | |
55 /* MMX2 specific */ | |
56 | |
57 #define DEF(x) x ## _sse | |
58 | |
59 /* Introduced only in MMX2 set */ | |
60 #define PAVGB "pavgb" | |
61 | |
62 #include "dsputil_mmx_avg.h" | |
63 | |
64 #undef DEF | |
65 #undef PAVGB | |
66 | |
67 /***********************************/ | |
68 /* standard MMX */ | |
69 | |
70 static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) | |
71 { | |
72 DCTELEM *p; | |
73 const UINT8 *pix; | |
74 int i; | |
75 | |
76 /* read the pixels */ | |
77 p = block; | |
78 pix = pixels; | |
79 __asm __volatile("pxor %%mm7, %%mm7":::"memory"); | |
80 for(i=0;i<4;i++) { | |
81 __asm __volatile( | |
82 "movq %1, %%mm0\n\t" | |
83 "movq %2, %%mm1\n\t" | |
84 "movq %%mm0, %%mm2\n\t" | |
85 "movq %%mm1, %%mm3\n\t" | |
86 "punpcklbw %%mm7, %%mm0\n\t" | |
87 "punpckhbw %%mm7, %%mm2\n\t" | |
88 "punpcklbw %%mm7, %%mm1\n\t" | |
89 "punpckhbw %%mm7, %%mm3\n\t" | |
90 "movq %%mm0, %0\n\t" | |
91 "movq %%mm2, 8%0\n\t" | |
92 "movq %%mm1, 16%0\n\t" | |
93 "movq %%mm3, 24%0\n\t" | |
94 :"=m"(*p) | |
95 :"m"(*pix), "m"(*(pix+line_size)) | |
96 :"memory"); | |
97 pix += line_size*2; | |
98 p += 16; | |
99 } | |
100 } | |
101 | |
102 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |
103 { | |
104 const DCTELEM *p; | |
105 UINT8 *pix; | |
106 int i; | |
107 | |
108 /* read the pixels */ | |
109 p = block; | |
110 pix = pixels; | |
111 for(i=0;i<2;i++) { | |
112 __asm __volatile( | |
113 "movq %4, %%mm0\n\t" | |
114 "movq 8%4, %%mm1\n\t" | |
115 "movq 16%4, %%mm2\n\t" | |
116 "movq 24%4, %%mm3\n\t" | |
117 "movq 32%4, %%mm4\n\t" | |
118 "movq 40%4, %%mm5\n\t" | |
119 "movq 48%4, %%mm6\n\t" | |
120 "movq 56%4, %%mm7\n\t" | |
121 "packuswb %%mm1, %%mm0\n\t" | |
122 "packuswb %%mm3, %%mm2\n\t" | |
123 "packuswb %%mm5, %%mm4\n\t" | |
124 "packuswb %%mm7, %%mm6\n\t" | |
125 "movq %%mm0, %0\n\t" | |
126 "movq %%mm2, %1\n\t" | |
127 "movq %%mm4, %2\n\t" | |
128 "movq %%mm6, %3\n\t" | |
129 :"=m"(*pix), "=m"(*(pix+line_size)) | |
130 ,"=m"(*(pix+line_size*2)), "=m"(*(pix+line_size*3)) | |
131 :"m"(*p) | |
132 :"memory"); | |
133 pix += line_size*4; | |
134 p += 32; | |
135 } | |
136 } | |
137 | |
138 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |
139 { | |
140 const DCTELEM *p; | |
141 UINT8 *pix; | |
142 int i; | |
143 | |
144 /* read the pixels */ | |
145 p = block; | |
146 pix = pixels; | |
147 __asm __volatile("pxor %%mm7, %%mm7":::"memory"); | |
148 for(i=0;i<4;i++) { | |
149 __asm __volatile( | |
150 "movq %2, %%mm0\n\t" | |
151 "movq 8%2, %%mm1\n\t" | |
152 "movq 16%2, %%mm2\n\t" | |
153 "movq 24%2, %%mm3\n\t" | |
154 "movq %0, %%mm4\n\t" | |
155 "movq %1, %%mm6\n\t" | |
156 "movq %%mm4, %%mm5\n\t" | |
157 "punpcklbw %%mm7, %%mm4\n\t" | |
158 "punpckhbw %%mm7, %%mm5\n\t" | |
159 "paddsw %%mm4, %%mm0\n\t" | |
160 "paddsw %%mm5, %%mm1\n\t" | |
161 "movq %%mm6, %%mm5\n\t" | |
162 "punpcklbw %%mm7, %%mm6\n\t" | |
163 "punpckhbw %%mm7, %%mm5\n\t" | |
164 "paddsw %%mm6, %%mm2\n\t" | |
165 "paddsw %%mm5, %%mm3\n\t" | |
166 "packuswb %%mm1, %%mm0\n\t" | |
167 "packuswb %%mm3, %%mm2\n\t" | |
168 "movq %%mm0, %0\n\t" | |
169 "movq %%mm2, %1\n\t" | |
170 :"=m"(*pix), "=m"(*(pix+line_size)) | |
171 :"m"(*p) | |
172 :"memory"); | |
173 pix += line_size*2; | |
174 p += 16; | |
175 } | |
176 } | |
177 | |
178 static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
179 { | |
180 int dh, hh; | |
181 UINT8 *p; | |
182 const UINT8 *pix; | |
183 p = block; | |
184 pix = pixels; | |
185 hh=h>>2; | |
186 dh=h&3; | |
187 while(hh--) { | |
188 __asm __volatile( | |
189 "movq %4, %%mm0\n\t" | |
190 "movq %5, %%mm1\n\t" | |
191 "movq %6, %%mm2\n\t" | |
192 "movq %7, %%mm3\n\t" | |
193 "movq %%mm0, %0\n\t" | |
194 "movq %%mm1, %1\n\t" | |
195 "movq %%mm2, %2\n\t" | |
196 "movq %%mm3, %3\n\t" | |
197 :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3)) | |
198 :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3)) | |
199 :"memory"); | |
200 pix = pix + line_size*4; | |
201 p = p + line_size*4; | |
202 } | |
203 while(dh--) { | |
204 __asm __volatile( | |
205 "movq %1, %%mm0\n\t" | |
206 "movq %%mm0, %0\n\t" | |
207 :"=m"(*p) | |
208 :"m"(*pix) | |
209 :"memory"); | |
210 pix = pix + line_size; | |
211 p = p + line_size; | |
212 } | |
213 } | |
214 | |
215 static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
216 { | |
217 UINT8 *p; | |
218 const UINT8 *pix; | |
219 p = block; | |
220 pix = pixels; | |
221 __asm __volatile( | |
222 "pxor %%mm7, %%mm7\n\t" | |
223 "movq %0, %%mm4\n\t" | |
8 | 224 ::"m"(mm_wone):"memory"); |
0 | 225 do { |
226 __asm __volatile( | |
227 "movq %1, %%mm0\n\t" | |
228 "movq 1%1, %%mm1\n\t" | |
229 "movq %%mm0, %%mm2\n\t" | |
230 "movq %%mm1, %%mm3\n\t" | |
231 "punpcklbw %%mm7, %%mm0\n\t" | |
232 "punpcklbw %%mm7, %%mm1\n\t" | |
233 "punpckhbw %%mm7, %%mm2\n\t" | |
234 "punpckhbw %%mm7, %%mm3\n\t" | |
235 "paddusw %%mm1, %%mm0\n\t" | |
236 "paddusw %%mm3, %%mm2\n\t" | |
237 "paddusw %%mm4, %%mm0\n\t" | |
238 "paddusw %%mm4, %%mm2\n\t" | |
239 "psrlw $1, %%mm0\n\t" | |
240 "psrlw $1, %%mm2\n\t" | |
241 "packuswb %%mm2, %%mm0\n\t" | |
242 "movq %%mm0, %0\n\t" | |
243 :"=m"(*p) | |
244 :"m"(*pix) | |
245 :"memory"); | |
246 pix += line_size; p += line_size; | |
247 } while (--h); | |
248 } | |
249 | |
250 static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
251 { | |
252 UINT8 *p; | |
253 const UINT8 *pix; | |
254 p = block; | |
255 pix = pixels; | |
256 __asm __volatile( | |
257 "pxor %%mm7, %%mm7\n\t" | |
258 "movq %0, %%mm4\n\t" | |
8 | 259 ::"m"(mm_wone):"memory"); |
0 | 260 do { |
261 __asm __volatile( | |
262 "movq %1, %%mm0\n\t" | |
263 "movq %2, %%mm1\n\t" | |
264 "movq %%mm0, %%mm2\n\t" | |
265 "movq %%mm1, %%mm3\n\t" | |
266 "punpcklbw %%mm7, %%mm0\n\t" | |
267 "punpcklbw %%mm7, %%mm1\n\t" | |
268 "punpckhbw %%mm7, %%mm2\n\t" | |
269 "punpckhbw %%mm7, %%mm3\n\t" | |
270 "paddusw %%mm1, %%mm0\n\t" | |
271 "paddusw %%mm3, %%mm2\n\t" | |
272 "paddusw %%mm4, %%mm0\n\t" | |
273 "paddusw %%mm4, %%mm2\n\t" | |
274 "psrlw $1, %%mm0\n\t" | |
275 "psrlw $1, %%mm2\n\t" | |
276 "packuswb %%mm2, %%mm0\n\t" | |
277 "movq %%mm0, %0\n\t" | |
278 :"=m"(*p) | |
279 :"m"(*pix), | |
280 "m"(*(pix+line_size)) | |
281 :"memory"); | |
282 pix += line_size; | |
283 p += line_size; | |
284 } while (--h); | |
285 } | |
286 | |
287 static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
288 { | |
289 UINT8 *p; | |
290 const UINT8 *pix; | |
291 p = block; | |
292 pix = pixels; | |
293 __asm __volatile( | |
294 "pxor %%mm7, %%mm7\n\t" | |
295 "movq %0, %%mm6\n\t" | |
8 | 296 ::"m"(mm_wtwo):"memory"); |
0 | 297 do { |
298 __asm __volatile( | |
299 "movq %1, %%mm0\n\t" | |
300 "movq %2, %%mm1\n\t" | |
301 "movq 1%1, %%mm4\n\t" | |
302 "movq 1%2, %%mm5\n\t" | |
303 "movq %%mm0, %%mm2\n\t" | |
304 "movq %%mm1, %%mm3\n\t" | |
305 "punpcklbw %%mm7, %%mm0\n\t" | |
306 "punpcklbw %%mm7, %%mm1\n\t" | |
307 "punpckhbw %%mm7, %%mm2\n\t" | |
308 "punpckhbw %%mm7, %%mm3\n\t" | |
309 "paddusw %%mm1, %%mm0\n\t" | |
310 "paddusw %%mm3, %%mm2\n\t" | |
311 "movq %%mm4, %%mm1\n\t" | |
312 "movq %%mm5, %%mm3\n\t" | |
313 "punpcklbw %%mm7, %%mm4\n\t" | |
314 "punpcklbw %%mm7, %%mm5\n\t" | |
315 "punpckhbw %%mm7, %%mm1\n\t" | |
316 "punpckhbw %%mm7, %%mm3\n\t" | |
317 "paddusw %%mm5, %%mm4\n\t" | |
318 "paddusw %%mm3, %%mm1\n\t" | |
319 "paddusw %%mm6, %%mm4\n\t" | |
320 "paddusw %%mm6, %%mm1\n\t" | |
321 "paddusw %%mm4, %%mm0\n\t" | |
322 "paddusw %%mm1, %%mm2\n\t" | |
323 "psrlw $2, %%mm0\n\t" | |
324 "psrlw $2, %%mm2\n\t" | |
325 "packuswb %%mm2, %%mm0\n\t" | |
326 "movq %%mm0, %0\n\t" | |
327 :"=m"(*p) | |
328 :"m"(*pix), | |
329 "m"(*(pix+line_size)) | |
330 :"memory"); | |
331 pix += line_size; | |
332 p += line_size; | |
333 } while(--h); | |
334 } | |
335 | |
336 static void put_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
337 { | |
338 UINT8 *p; | |
339 const UINT8 *pix; | |
340 p = block; | |
341 pix = pixels; | |
342 __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory"); | |
343 do { | |
344 __asm __volatile( | |
345 "movq %1, %%mm0\n\t" | |
346 "movq 1%1, %%mm1\n\t" | |
347 "movq %%mm0, %%mm2\n\t" | |
348 "movq %%mm1, %%mm3\n\t" | |
349 "punpcklbw %%mm7, %%mm0\n\t" | |
350 "punpcklbw %%mm7, %%mm1\n\t" | |
351 "punpckhbw %%mm7, %%mm2\n\t" | |
352 "punpckhbw %%mm7, %%mm3\n\t" | |
353 "paddusw %%mm1, %%mm0\n\t" | |
354 "paddusw %%mm3, %%mm2\n\t" | |
355 "psrlw $1, %%mm0\n\t" | |
356 "psrlw $1, %%mm2\n\t" | |
357 "packuswb %%mm2, %%mm0\n\t" | |
358 "movq %%mm0, %0\n\t" | |
359 :"=m"(*p) | |
360 :"m"(*pix) | |
361 :"memory"); | |
362 pix += line_size; | |
363 p += line_size; | |
364 } while (--h); | |
365 } | |
366 | |
367 static void put_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
368 { | |
369 UINT8 *p; | |
370 const UINT8 *pix; | |
371 p = block; | |
372 pix = pixels; | |
373 __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory"); | |
374 do { | |
375 __asm __volatile( | |
376 "movq %1, %%mm0\n\t" | |
377 "movq %2, %%mm1\n\t" | |
378 "movq %%mm0, %%mm2\n\t" | |
379 "movq %%mm1, %%mm3\n\t" | |
380 "punpcklbw %%mm7, %%mm0\n\t" | |
381 "punpcklbw %%mm7, %%mm1\n\t" | |
382 "punpckhbw %%mm7, %%mm2\n\t" | |
383 "punpckhbw %%mm7, %%mm3\n\t" | |
384 "paddusw %%mm1, %%mm0\n\t" | |
385 "paddusw %%mm3, %%mm2\n\t" | |
386 "psrlw $1, %%mm0\n\t" | |
387 "psrlw $1, %%mm2\n\t" | |
388 "packuswb %%mm2, %%mm0\n\t" | |
389 "movq %%mm0, %0\n\t" | |
390 :"=m"(*p) | |
391 :"m"(*pix), | |
392 "m"(*(pix+line_size)) | |
393 :"memory"); | |
394 pix += line_size; | |
395 p += line_size; | |
396 } while(--h); | |
397 } | |
398 | |
399 static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
400 { | |
401 UINT8 *p; | |
402 const UINT8 *pix; | |
403 p = block; | |
404 pix = pixels; | |
405 __asm __volatile( | |
406 "pxor %%mm7, %%mm7\n\t" | |
407 "movq %0, %%mm6\n\t" | |
8 | 408 ::"m"(mm_wone):"memory"); |
0 | 409 do { |
410 __asm __volatile( | |
411 "movq %1, %%mm0\n\t" | |
412 "movq %2, %%mm1\n\t" | |
413 "movq 1%1, %%mm4\n\t" | |
414 "movq 1%2, %%mm5\n\t" | |
415 "movq %%mm0, %%mm2\n\t" | |
416 "movq %%mm1, %%mm3\n\t" | |
417 "punpcklbw %%mm7, %%mm0\n\t" | |
418 "punpcklbw %%mm7, %%mm1\n\t" | |
419 "punpckhbw %%mm7, %%mm2\n\t" | |
420 "punpckhbw %%mm7, %%mm3\n\t" | |
421 "paddusw %%mm1, %%mm0\n\t" | |
422 "paddusw %%mm3, %%mm2\n\t" | |
423 "movq %%mm4, %%mm1\n\t" | |
424 "movq %%mm5, %%mm3\n\t" | |
425 "punpcklbw %%mm7, %%mm4\n\t" | |
426 "punpcklbw %%mm7, %%mm5\n\t" | |
427 "punpckhbw %%mm7, %%mm1\n\t" | |
428 "punpckhbw %%mm7, %%mm3\n\t" | |
429 "paddusw %%mm5, %%mm4\n\t" | |
430 "paddusw %%mm3, %%mm1\n\t" | |
431 "paddusw %%mm6, %%mm4\n\t" | |
432 "paddusw %%mm6, %%mm1\n\t" | |
433 "paddusw %%mm4, %%mm0\n\t" | |
434 "paddusw %%mm1, %%mm2\n\t" | |
435 "psrlw $2, %%mm0\n\t" | |
436 "psrlw $2, %%mm2\n\t" | |
437 "packuswb %%mm2, %%mm0\n\t" | |
438 "movq %%mm0, %0\n\t" | |
439 :"=m"(*p) | |
440 :"m"(*pix), | |
441 "m"(*(pix+line_size)) | |
442 :"memory"); | |
443 pix += line_size; | |
444 p += line_size; | |
445 } while(--h); | |
446 } | |
447 | |
448 static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
449 { | |
450 UINT8 *p; | |
451 const UINT8 *pix; | |
452 p = block; | |
453 pix = pixels; | |
454 __asm __volatile( | |
455 "pxor %%mm7, %%mm7\n\t" | |
456 "movq %0, %%mm6\n\t" | |
8 | 457 ::"m"(mm_wone):"memory"); |
0 | 458 do { |
459 __asm __volatile( | |
460 "movq %0, %%mm0\n\t" | |
461 "movq %1, %%mm1\n\t" | |
462 "movq %%mm0, %%mm2\n\t" | |
463 "movq %%mm1, %%mm3\n\t" | |
464 "punpcklbw %%mm7, %%mm0\n\t" | |
465 "punpcklbw %%mm7, %%mm1\n\t" | |
466 "punpckhbw %%mm7, %%mm2\n\t" | |
467 "punpckhbw %%mm7, %%mm3\n\t" | |
468 "paddusw %%mm1, %%mm0\n\t" | |
469 "paddusw %%mm3, %%mm2\n\t" | |
470 "paddusw %%mm6, %%mm0\n\t" | |
471 "paddusw %%mm6, %%mm2\n\t" | |
472 "psrlw $1, %%mm0\n\t" | |
473 "psrlw $1, %%mm2\n\t" | |
474 "packuswb %%mm2, %%mm0\n\t" | |
475 "movq %%mm0, %0\n\t" | |
476 :"=m"(*p) | |
477 :"m"(*pix) | |
478 :"memory"); | |
479 pix += line_size; | |
480 p += line_size; | |
481 } | |
482 while (--h); | |
483 } | |
484 | |
485 static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
486 { | |
487 UINT8 *p; | |
488 const UINT8 *pix; | |
489 p = block; | |
490 pix = pixels; | |
491 __asm __volatile( | |
492 "pxor %%mm7, %%mm7\n\t" | |
493 "movq %0, %%mm6\n\t" | |
8 | 494 ::"m"(mm_wone):"memory"); |
0 | 495 do { |
496 __asm __volatile( | |
497 "movq %1, %%mm1\n\t" | |
498 "movq %0, %%mm0\n\t" | |
499 "movq 1%1, %%mm4\n\t" | |
500 "movq %%mm0, %%mm2\n\t" | |
501 "movq %%mm1, %%mm3\n\t" | |
502 "movq %%mm4, %%mm5\n\t" | |
503 "punpcklbw %%mm7, %%mm1\n\t" | |
504 "punpckhbw %%mm7, %%mm3\n\t" | |
505 "punpcklbw %%mm7, %%mm4\n\t" | |
506 "punpckhbw %%mm7, %%mm5\n\t" | |
507 "punpcklbw %%mm7, %%mm0\n\t" | |
508 "punpckhbw %%mm7, %%mm2\n\t" | |
509 "paddusw %%mm4, %%mm1\n\t" | |
510 "paddusw %%mm5, %%mm3\n\t" | |
511 "paddusw %%mm6, %%mm1\n\t" | |
512 "paddusw %%mm6, %%mm3\n\t" | |
513 "psrlw $1, %%mm1\n\t" | |
514 "psrlw $1, %%mm3\n\t" | |
515 "paddusw %%mm6, %%mm0\n\t" | |
516 "paddusw %%mm6, %%mm2\n\t" | |
517 "paddusw %%mm1, %%mm0\n\t" | |
518 "paddusw %%mm3, %%mm2\n\t" | |
519 "psrlw $1, %%mm0\n\t" | |
520 "psrlw $1, %%mm2\n\t" | |
521 "packuswb %%mm2, %%mm0\n\t" | |
522 "movq %%mm0, %0\n\t" | |
523 :"=m"(*p) | |
524 :"m"(*pix) | |
525 :"memory"); | |
526 pix += line_size; | |
527 p += line_size; | |
528 } while (--h); | |
529 } | |
530 | |
531 static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
532 { | |
533 UINT8 *p; | |
534 const UINT8 *pix; | |
535 p = block; | |
536 pix = pixels; | |
537 __asm __volatile( | |
538 "pxor %%mm7, %%mm7\n\t" | |
539 "movq %0, %%mm6\n\t" | |
8 | 540 ::"m"(mm_wone):"memory"); |
0 | 541 do { |
542 __asm __volatile( | |
543 "movq %1, %%mm1\n\t" | |
544 "movq %0, %%mm0\n\t" | |
545 "movq %2, %%mm4\n\t" | |
546 "movq %%mm0, %%mm2\n\t" | |
547 "movq %%mm1, %%mm3\n\t" | |
548 "movq %%mm4, %%mm5\n\t" | |
549 "punpcklbw %%mm7, %%mm1\n\t" | |
550 "punpckhbw %%mm7, %%mm3\n\t" | |
551 "punpcklbw %%mm7, %%mm4\n\t" | |
552 "punpckhbw %%mm7, %%mm5\n\t" | |
553 "punpcklbw %%mm7, %%mm0\n\t" | |
554 "punpckhbw %%mm7, %%mm2\n\t" | |
555 "paddusw %%mm4, %%mm1\n\t" | |
556 "paddusw %%mm5, %%mm3\n\t" | |
557 "paddusw %%mm6, %%mm1\n\t" | |
558 "paddusw %%mm6, %%mm3\n\t" | |
559 "psrlw $1, %%mm1\n\t" | |
560 "psrlw $1, %%mm3\n\t" | |
561 "paddusw %%mm6, %%mm0\n\t" | |
562 "paddusw %%mm6, %%mm2\n\t" | |
563 "paddusw %%mm1, %%mm0\n\t" | |
564 "paddusw %%mm3, %%mm2\n\t" | |
565 "psrlw $1, %%mm0\n\t" | |
566 "psrlw $1, %%mm2\n\t" | |
567 "packuswb %%mm2, %%mm0\n\t" | |
568 "movq %%mm0, %0\n\t" | |
569 :"=m"(*p) | |
570 :"m"(*pix), "m"(*(pix+line_size)) | |
571 :"memory"); | |
572 pix += line_size; | |
573 p += line_size ; | |
574 } while(--h); | |
575 } | |
576 | |
577 static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
578 { | |
579 UINT8 *p; | |
580 const UINT8 *pix; | |
581 p = block; | |
582 pix = pixels; | |
583 __asm __volatile( | |
584 "pxor %%mm7, %%mm7\n\t" | |
585 "movq %0, %%mm6\n\t" | |
8 | 586 ::"m"(mm_wtwo):"memory"); |
0 | 587 do { |
588 __asm __volatile( | |
589 "movq %1, %%mm0\n\t" | |
590 "movq %2, %%mm1\n\t" | |
591 "movq 1%1, %%mm4\n\t" | |
592 "movq 1%2, %%mm5\n\t" | |
593 "movq %%mm0, %%mm2\n\t" | |
594 "movq %%mm1, %%mm3\n\t" | |
595 "punpcklbw %%mm7, %%mm0\n\t" | |
596 "punpcklbw %%mm7, %%mm1\n\t" | |
597 "punpckhbw %%mm7, %%mm2\n\t" | |
598 "punpckhbw %%mm7, %%mm3\n\t" | |
599 "paddusw %%mm1, %%mm0\n\t" | |
600 "paddusw %%mm3, %%mm2\n\t" | |
601 "movq %%mm4, %%mm1\n\t" | |
602 "movq %%mm5, %%mm3\n\t" | |
603 "punpcklbw %%mm7, %%mm4\n\t" | |
604 "punpcklbw %%mm7, %%mm5\n\t" | |
605 "punpckhbw %%mm7, %%mm1\n\t" | |
606 "punpckhbw %%mm7, %%mm3\n\t" | |
607 "paddusw %%mm5, %%mm4\n\t" | |
608 "paddusw %%mm3, %%mm1\n\t" | |
609 "paddusw %%mm6, %%mm4\n\t" | |
610 "paddusw %%mm6, %%mm1\n\t" | |
611 "paddusw %%mm4, %%mm0\n\t" | |
612 "paddusw %%mm1, %%mm2\n\t" | |
613 "movq %3, %%mm5\n\t" | |
614 "psrlw $2, %%mm0\n\t" | |
615 "movq %0, %%mm1\n\t" | |
616 "psrlw $2, %%mm2\n\t" | |
617 "movq %%mm1, %%mm3\n\t" | |
618 "punpcklbw %%mm7, %%mm1\n\t" | |
619 "punpckhbw %%mm7, %%mm3\n\t" | |
620 "paddusw %%mm1, %%mm0\n\t" | |
621 "paddusw %%mm3, %%mm2\n\t" | |
622 "paddusw %%mm5, %%mm0\n\t" | |
623 "paddusw %%mm5, %%mm2\n\t" | |
624 "psrlw $1, %%mm0\n\t" | |
625 "psrlw $1, %%mm2\n\t" | |
626 "packuswb %%mm2, %%mm0\n\t" | |
627 "movq %%mm0, %0\n\t" | |
628 :"=m"(*p) | |
629 :"m"(*pix), | |
8 | 630 "m"(*(pix+line_size)), "m"(mm_wone) |
0 | 631 :"memory"); |
632 pix += line_size; | |
633 p += line_size ; | |
634 } while(--h); | |
635 } | |
636 | |
637 static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
638 { | |
639 UINT8 *p; | |
640 const UINT8 *pix; | |
641 p = block; | |
642 pix = pixels; | |
643 __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory"); | |
644 do { | |
645 __asm __volatile( | |
646 "movq %1, %%mm0\n\t" | |
647 "movq %0, %%mm1\n\t" | |
648 "movq %%mm0, %%mm2\n\t" | |
649 "movq %%mm1, %%mm3\n\t" | |
650 "punpcklbw %%mm7, %%mm0\n\t" | |
651 "punpcklbw %%mm7, %%mm1\n\t" | |
652 "punpckhbw %%mm7, %%mm2\n\t" | |
653 "punpckhbw %%mm7, %%mm3\n\t" | |
654 "paddusw %%mm1, %%mm0\n\t" | |
655 "paddusw %%mm3, %%mm2\n\t" | |
656 "psrlw $1, %%mm0\n\t" | |
657 "psrlw $1, %%mm2\n\t" | |
658 "packuswb %%mm2, %%mm0\n\t" | |
659 "movq %%mm0, %0\n\t" | |
660 :"=m"(*p) | |
661 :"m"(*pix) | |
662 :"memory"); | |
663 pix += line_size; | |
664 p += line_size ; | |
665 } while (--h); | |
666 } | |
667 | |
668 static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
669 { | |
670 UINT8 *p; | |
671 const UINT8 *pix; | |
672 p = block; | |
673 pix = pixels; | |
674 __asm __volatile( | |
675 "pxor %%mm7, %%mm7\n\t":::"memory"); | |
676 do { | |
677 __asm __volatile( | |
678 "movq %1, %%mm0\n\t" | |
679 "movq 1%1, %%mm1\n\t" | |
680 "movq %0, %%mm4\n\t" | |
681 "movq %%mm0, %%mm2\n\t" | |
682 "movq %%mm1, %%mm3\n\t" | |
683 "movq %%mm4, %%mm5\n\t" | |
684 "punpcklbw %%mm7, %%mm0\n\t" | |
685 "punpcklbw %%mm7, %%mm1\n\t" | |
686 "punpckhbw %%mm7, %%mm2\n\t" | |
687 "punpckhbw %%mm7, %%mm3\n\t" | |
688 "punpcklbw %%mm7, %%mm4\n\t" | |
689 "punpckhbw %%mm7, %%mm5\n\t" | |
690 "paddusw %%mm1, %%mm0\n\t" | |
691 "paddusw %%mm3, %%mm2\n\t" | |
692 "psrlw $1, %%mm0\n\t" | |
693 "psrlw $1, %%mm2\n\t" | |
694 "paddusw %%mm4, %%mm0\n\t" | |
695 "paddusw %%mm5, %%mm2\n\t" | |
696 "psrlw $1, %%mm0\n\t" | |
697 "psrlw $1, %%mm2\n\t" | |
698 "packuswb %%mm2, %%mm0\n\t" | |
699 "movq %%mm0, %0\n\t" | |
700 :"=m"(*p) | |
701 :"m"(*pix) | |
702 :"memory"); | |
703 pix += line_size; | |
704 p += line_size; | |
705 } while (--h); | |
706 } | |
707 | |
708 static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
709 { | |
710 UINT8 *p; | |
711 const UINT8 *pix; | |
712 p = block; | |
713 pix = pixels; | |
714 __asm __volatile( | |
715 "pxor %%mm7, %%mm7\n\t":::"memory"); | |
716 do { | |
717 __asm __volatile( | |
718 "movq %1, %%mm0\n\t" | |
719 "movq %2, %%mm1\n\t" | |
720 "movq %0, %%mm4\n\t" | |
721 "movq %%mm0, %%mm2\n\t" | |
722 "movq %%mm1, %%mm3\n\t" | |
723 "movq %%mm4, %%mm5\n\t" | |
724 "punpcklbw %%mm7, %%mm0\n\t" | |
725 "punpcklbw %%mm7, %%mm1\n\t" | |
726 "punpckhbw %%mm7, %%mm2\n\t" | |
727 "punpckhbw %%mm7, %%mm3\n\t" | |
728 "punpcklbw %%mm7, %%mm4\n\t" | |
729 "punpckhbw %%mm7, %%mm5\n\t" | |
730 "paddusw %%mm1, %%mm0\n\t" | |
731 "paddusw %%mm3, %%mm2\n\t" | |
732 "psrlw $1, %%mm0\n\t" | |
733 "psrlw $1, %%mm2\n\t" | |
734 "paddusw %%mm4, %%mm0\n\t" | |
735 "paddusw %%mm5, %%mm2\n\t" | |
736 "psrlw $1, %%mm0\n\t" | |
737 "psrlw $1, %%mm2\n\t" | |
738 "packuswb %%mm2, %%mm0\n\t" | |
739 "movq %%mm0, %0\n\t" | |
740 :"=m"(*p) | |
741 :"m"(*pix), "m"(*(pix+line_size)) | |
742 :"memory"); | |
743 pix += line_size; | |
744 p += line_size ; | |
745 } while(--h); | |
746 } | |
747 | |
748 static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
749 { | |
750 UINT8 *p; | |
751 const UINT8 *pix; | |
752 p = block; | |
753 pix = pixels; | |
754 __asm __volatile( | |
755 "pxor %%mm7, %%mm7\n\t" | |
756 "movq %0, %%mm6\n\t" | |
8 | 757 ::"m"(mm_wone):"memory"); |
0 | 758 do { |
759 __asm __volatile( | |
760 "movq %1, %%mm0\n\t" | |
761 "movq %2, %%mm1\n\t" | |
762 "movq 1%1, %%mm4\n\t" | |
763 "movq 1%2, %%mm5\n\t" | |
764 "movq %%mm0, %%mm2\n\t" | |
765 "movq %%mm1, %%mm3\n\t" | |
766 "punpcklbw %%mm7, %%mm0\n\t" | |
767 "punpcklbw %%mm7, %%mm1\n\t" | |
768 "punpckhbw %%mm7, %%mm2\n\t" | |
769 "punpckhbw %%mm7, %%mm3\n\t" | |
770 "paddusw %%mm1, %%mm0\n\t" | |
771 "paddusw %%mm3, %%mm2\n\t" | |
772 "movq %%mm4, %%mm1\n\t" | |
773 "movq %%mm5, %%mm3\n\t" | |
774 "punpcklbw %%mm7, %%mm4\n\t" | |
775 "punpcklbw %%mm7, %%mm5\n\t" | |
776 "punpckhbw %%mm7, %%mm1\n\t" | |
777 "punpckhbw %%mm7, %%mm3\n\t" | |
778 "paddusw %%mm5, %%mm4\n\t" | |
779 "paddusw %%mm3, %%mm1\n\t" | |
780 "paddusw %%mm6, %%mm4\n\t" | |
781 "paddusw %%mm6, %%mm1\n\t" | |
782 "paddusw %%mm4, %%mm0\n\t" | |
783 "paddusw %%mm1, %%mm2\n\t" | |
784 "movq %0, %%mm1\n\t" | |
785 "psrlw $2, %%mm0\n\t" | |
786 "movq %%mm1, %%mm3\n\t" | |
787 "psrlw $2, %%mm2\n\t" | |
788 "punpcklbw %%mm7, %%mm1\n\t" | |
789 "punpckhbw %%mm7, %%mm3\n\t" | |
790 "paddusw %%mm1, %%mm0\n\t" | |
791 "paddusw %%mm3, %%mm2\n\t" | |
792 "psrlw $1, %%mm0\n\t" | |
793 "psrlw $1, %%mm2\n\t" | |
794 "packuswb %%mm2, %%mm0\n\t" | |
795 "movq %%mm0, %0\n\t" | |
796 :"=m"(*p) | |
797 :"m"(*pix), | |
798 "m"(*(pix+line_size)) | |
799 :"memory"); | |
800 pix += line_size; | |
801 p += line_size; | |
802 } while(--h); | |
803 } | |
804 | |
805 static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
806 { | |
807 DCTELEM *p; | |
808 const UINT8 *pix; | |
809 p = block; | |
810 pix = pixels; | |
811 __asm __volatile("pxor %%mm7, %%mm7":::"memory"); | |
812 do { | |
813 __asm __volatile( | |
814 "movq %0, %%mm0\n\t" | |
815 "movq %1, %%mm2\n\t" | |
816 "movq 8%0, %%mm1\n\t" | |
817 "movq %%mm2, %%mm3\n\t" | |
818 "punpcklbw %%mm7, %%mm2\n\t" | |
819 "punpckhbw %%mm7, %%mm3\n\t" | |
820 "psubsw %%mm2, %%mm0\n\t" | |
821 "psubsw %%mm3, %%mm1\n\t" | |
822 "movq %%mm0, %0\n\t" | |
823 "movq %%mm1, 8%0\n\t" | |
824 :"=m"(*p) | |
825 :"m"(*pix) | |
826 :"memory"); | |
827 pix += line_size; | |
828 p += 8; | |
829 } while (--h); | |
830 } | |
831 | |
832 static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
833 { | |
834 DCTELEM *p; | |
835 const UINT8 *pix; | |
836 p = block; | |
837 pix = pixels; | |
838 __asm __volatile( | |
839 "pxor %%mm7, %%mm7\n\t" | |
840 "movq %0, %%mm6" | |
8 | 841 ::"m"(mm_wone):"memory"); |
0 | 842 do { |
843 __asm __volatile( | |
844 "movq %0, %%mm0\n\t" | |
845 "movq %1, %%mm2\n\t" | |
846 "movq 8%0, %%mm1\n\t" | |
847 "movq 1%1, %%mm4\n\t" | |
848 "movq %%mm2, %%mm3\n\t" | |
849 "movq %%mm4, %%mm5\n\t" | |
850 "punpcklbw %%mm7, %%mm2\n\t" | |
851 "punpckhbw %%mm7, %%mm3\n\t" | |
852 "punpcklbw %%mm7, %%mm4\n\t" | |
853 "punpckhbw %%mm7, %%mm5\n\t" | |
854 "paddusw %%mm4, %%mm2\n\t" | |
855 "paddusw %%mm5, %%mm3\n\t" | |
856 "paddusw %%mm6, %%mm2\n\t" | |
857 "paddusw %%mm6, %%mm3\n\t" | |
858 "psrlw $1, %%mm2\n\t" | |
859 "psrlw $1, %%mm3\n\t" | |
860 "psubsw %%mm2, %%mm0\n\t" | |
861 "psubsw %%mm3, %%mm1\n\t" | |
862 "movq %%mm0, %0\n\t" | |
863 "movq %%mm1, 8%0\n\t" | |
864 :"=m"(*p) | |
865 :"m"(*pix) | |
866 :"memory"); | |
867 pix += line_size; | |
868 p += 8; | |
869 } while (--h); | |
870 } | |
871 | |
872 static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
873 { | |
874 DCTELEM *p; | |
875 const UINT8 *pix; | |
876 p = block; | |
877 pix = pixels; | |
878 __asm __volatile( | |
879 "pxor %%mm7, %%mm7\n\t" | |
880 "movq %0, %%mm6" | |
8 | 881 ::"m"(mm_wone):"memory"); |
0 | 882 do { |
883 __asm __volatile( | |
884 "movq %0, %%mm0\n\t" | |
885 "movq %1, %%mm2\n\t" | |
886 "movq 8%0, %%mm1\n\t" | |
887 "movq %2, %%mm4\n\t" | |
888 "movq %%mm2, %%mm3\n\t" | |
889 "movq %%mm4, %%mm5\n\t" | |
890 "punpcklbw %%mm7, %%mm2\n\t" | |
891 "punpckhbw %%mm7, %%mm3\n\t" | |
892 "punpcklbw %%mm7, %%mm4\n\t" | |
893 "punpckhbw %%mm7, %%mm5\n\t" | |
894 "paddusw %%mm4, %%mm2\n\t" | |
895 "paddusw %%mm5, %%mm3\n\t" | |
896 "paddusw %%mm6, %%mm2\n\t" | |
897 "paddusw %%mm6, %%mm3\n\t" | |
898 "psrlw $1, %%mm2\n\t" | |
899 "psrlw $1, %%mm3\n\t" | |
900 "psubsw %%mm2, %%mm0\n\t" | |
901 "psubsw %%mm3, %%mm1\n\t" | |
902 "movq %%mm0, %0\n\t" | |
903 "movq %%mm1, 8%0\n\t" | |
904 :"=m"(*p) | |
905 :"m"(*pix), "m"(*(pix+line_size)) | |
906 :"memory"); | |
907 pix += line_size; | |
908 p += 8; | |
909 } while (--h); | |
910 } | |
911 | |
912 static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
913 { | |
914 DCTELEM *p; | |
915 const UINT8 *pix; | |
916 p = block; | |
917 pix = pixels; | |
918 __asm __volatile( | |
919 "pxor %%mm7, %%mm7\n\t" | |
920 "movq %0, %%mm6\n\t" | |
8 | 921 ::"m"(mm_wtwo):"memory"); |
0 | 922 do { |
923 __asm __volatile( | |
924 "movq %1, %%mm0\n\t" | |
925 "movq %2, %%mm1\n\t" | |
926 "movq 1%1, %%mm4\n\t" | |
927 "movq 1%2, %%mm5\n\t" | |
928 "movq %%mm0, %%mm2\n\t" | |
929 "movq %%mm1, %%mm3\n\t" | |
930 "punpcklbw %%mm7, %%mm0\n\t" | |
931 "punpcklbw %%mm7, %%mm1\n\t" | |
932 "punpckhbw %%mm7, %%mm2\n\t" | |
933 "punpckhbw %%mm7, %%mm3\n\t" | |
934 "paddusw %%mm1, %%mm0\n\t" | |
935 "paddusw %%mm3, %%mm2\n\t" | |
936 "movq %%mm4, %%mm1\n\t" | |
937 "movq %%mm5, %%mm3\n\t" | |
938 "punpcklbw %%mm7, %%mm4\n\t" | |
939 "punpcklbw %%mm7, %%mm5\n\t" | |
940 "punpckhbw %%mm7, %%mm1\n\t" | |
941 "punpckhbw %%mm7, %%mm3\n\t" | |
942 "paddusw %%mm5, %%mm4\n\t" | |
943 "paddusw %%mm3, %%mm1\n\t" | |
944 "paddusw %%mm6, %%mm4\n\t" | |
945 "paddusw %%mm6, %%mm1\n\t" | |
946 "paddusw %%mm4, %%mm0\n\t" | |
947 "paddusw %%mm1, %%mm2\n\t" | |
948 "movq %0, %%mm1\n\t" | |
949 "movq 8%0, %%mm3\n\t" | |
950 "psrlw $2, %%mm0\n\t" | |
951 "psrlw $2, %%mm2\n\t" | |
952 "psubsw %%mm0, %%mm1\n\t" | |
953 "psubsw %%mm2, %%mm3\n\t" | |
954 "movq %%mm1, %0\n\t" | |
955 "movq %%mm3, 8%0\n\t" | |
956 :"=m"(*p) | |
957 :"m"(*pix), | |
958 "m"(*(pix+line_size)) | |
959 :"memory"); | |
960 pix += line_size; | |
961 p += 8 ; | |
962 } while(--h); | |
963 } | |
964 | |
965 void dsputil_init_mmx(void) | |
966 { | |
967 mm_flags = mm_support(); | |
968 #if 0 | |
969 printf("CPU flags:"); | |
970 if (mm_flags & MM_MMX) | |
971 printf(" mmx"); | |
972 if (mm_flags & MM_MMXEXT) | |
973 printf(" mmxext"); | |
974 if (mm_flags & MM_3DNOW) | |
975 printf(" 3dnow"); | |
976 if (mm_flags & MM_SSE) | |
977 printf(" sse"); | |
978 if (mm_flags & MM_SSE2) | |
979 printf(" sse2"); | |
980 printf("\n"); | |
981 #endif | |
982 | |
983 if (mm_flags & MM_MMX) { | |
984 get_pixels = get_pixels_mmx; | |
985 put_pixels_clamped = put_pixels_clamped_mmx; | |
986 add_pixels_clamped = add_pixels_clamped_mmx; | |
987 | |
988 pix_abs16x16 = pix_abs16x16_mmx; | |
989 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |
990 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |
991 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | |
992 av_fdct = fdct_mmx; | |
993 | |
994 put_pixels_tab[0] = put_pixels_mmx; | |
995 put_pixels_tab[1] = put_pixels_x2_mmx; | |
996 put_pixels_tab[2] = put_pixels_y2_mmx; | |
997 put_pixels_tab[3] = put_pixels_xy2_mmx; | |
998 | |
999 put_no_rnd_pixels_tab[0] = put_pixels_mmx; | |
1000 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
1001 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
1002 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx; | |
1003 | |
1004 avg_pixels_tab[0] = avg_pixels_mmx; | |
1005 avg_pixels_tab[1] = avg_pixels_x2_mmx; | |
1006 avg_pixels_tab[2] = avg_pixels_y2_mmx; | |
1007 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
1008 | |
1009 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx; | |
1010 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx; | |
1011 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx; | |
1012 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx; | |
1013 | |
1014 sub_pixels_tab[0] = sub_pixels_mmx; | |
1015 sub_pixels_tab[1] = sub_pixels_x2_mmx; | |
1016 sub_pixels_tab[2] = sub_pixels_y2_mmx; | |
1017 sub_pixels_tab[3] = sub_pixels_xy2_mmx; | |
1018 | |
1019 if (mm_flags & MM_MMXEXT) { | |
1020 pix_abs16x16 = pix_abs16x16_sse; | |
1021 } | |
1022 | |
1023 if (mm_flags & MM_SSE) { | |
1024 put_pixels_tab[1] = put_pixels_x2_sse; | |
1025 put_pixels_tab[2] = put_pixels_y2_sse; | |
1026 | |
1027 avg_pixels_tab[0] = avg_pixels_sse; | |
1028 avg_pixels_tab[1] = avg_pixels_x2_sse; | |
1029 avg_pixels_tab[2] = avg_pixels_y2_sse; | |
1030 avg_pixels_tab[3] = avg_pixels_xy2_sse; | |
1031 | |
1032 sub_pixels_tab[1] = sub_pixels_x2_sse; | |
1033 sub_pixels_tab[2] = sub_pixels_y2_sse; | |
1034 } else if (mm_flags & MM_3DNOW) { | |
1035 put_pixels_tab[1] = put_pixels_x2_3dnow; | |
1036 put_pixels_tab[2] = put_pixels_y2_3dnow; | |
1037 | |
1038 avg_pixels_tab[0] = avg_pixels_3dnow; | |
1039 avg_pixels_tab[1] = avg_pixels_x2_3dnow; | |
1040 avg_pixels_tab[2] = avg_pixels_y2_3dnow; | |
1041 avg_pixels_tab[3] = avg_pixels_xy2_3dnow; | |
1042 | |
1043 sub_pixels_tab[1] = sub_pixels_x2_3dnow; | |
1044 sub_pixels_tab[2] = sub_pixels_y2_3dnow; | |
1045 } | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
8
diff
changeset
|
1046 |
42 | 1047 /* idct */ |
1048 if (mm_flags & MM_MMXEXT) { | |
1049 ff_idct = ff_mmxext_idct; | |
1050 } else { | |
1051 ff_idct = ff_mmx_idct; | |
1052 } | |
0 | 1053 } |
1054 } |