Mercurial > mplayer.hg
comparison postproc/swscale_template.c @ 18104:7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
author | nplourde |
---|---|
date | Sat, 15 Apr 2006 20:46:54 +0000 |
parents | fbf94ea858f1 |
children | b10d4b3cb9ec |
comparison
equal
deleted
inserted
replaced
18103:26ea12332e67 | 18104:7b408d60de9e |
---|---|
13 | 13 |
14 You should have received a copy of the GNU General Public License | 14 You should have received a copy of the GNU General Public License |
15 along with this program; if not, write to the Free Software | 15 along with this program; if not, write to the Free Software |
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 */ | 17 */ |
18 | |
19 #include "asmalign.h" | |
18 | 20 |
19 #undef REAL_MOVNTQ | 21 #undef REAL_MOVNTQ |
20 #undef MOVNTQ | 22 #undef MOVNTQ |
21 #undef PAVGB | 23 #undef PAVGB |
22 #undef PREFETCH | 24 #undef PREFETCH |
69 "xor %%"REG_a", %%"REG_a" \n\t"\ | 71 "xor %%"REG_a", %%"REG_a" \n\t"\ |
70 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ | 72 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ |
71 "movq %%mm3, %%mm4 \n\t"\ | 73 "movq %%mm3, %%mm4 \n\t"\ |
72 "lea " offset "(%0), %%"REG_d" \n\t"\ | 74 "lea " offset "(%0), %%"REG_d" \n\t"\ |
73 "mov (%%"REG_d"), %%"REG_S" \n\t"\ | 75 "mov (%%"REG_d"), %%"REG_S" \n\t"\ |
74 ".balign 16 \n\t" /* FIXME Unroll? */\ | 76 ASMALIGN16 /* FIXME Unroll? */\ |
75 "1: \n\t"\ | 77 "1: \n\t"\ |
76 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ | 78 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ |
77 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\ | 79 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\ |
78 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\ | 80 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\ |
79 "add $16, %%"REG_d" \n\t"\ | 81 "add $16, %%"REG_d" \n\t"\ |
96 "mov (%%"REG_d"), %%"REG_S" \n\t"\ | 98 "mov (%%"REG_d"), %%"REG_S" \n\t"\ |
97 "jb 1b \n\t" | 99 "jb 1b \n\t" |
98 | 100 |
99 #define YSCALEYUV2YV121 \ | 101 #define YSCALEYUV2YV121 \ |
100 "mov %2, %%"REG_a" \n\t"\ | 102 "mov %2, %%"REG_a" \n\t"\ |
101 ".balign 16 \n\t" /* FIXME Unroll? */\ | 103 ASMALIGN16 /* FIXME Unroll? */\ |
102 "1: \n\t"\ | 104 "1: \n\t"\ |
103 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ | 105 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ |
104 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\ | 106 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\ |
105 "psraw $7, %%mm0 \n\t"\ | 107 "psraw $7, %%mm0 \n\t"\ |
106 "psraw $7, %%mm1 \n\t"\ | 108 "psraw $7, %%mm1 \n\t"\ |
116 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | 118 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) |
117 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | 119 : "%eax", "%ebx", "%ecx", "%edx", "%esi" |
118 */ | 120 */ |
119 #define YSCALEYUV2PACKEDX \ | 121 #define YSCALEYUV2PACKEDX \ |
120 "xor %%"REG_a", %%"REG_a" \n\t"\ | 122 "xor %%"REG_a", %%"REG_a" \n\t"\ |
121 ".balign 16 \n\t"\ | 123 ASMALIGN16\ |
122 "nop \n\t"\ | 124 "nop \n\t"\ |
123 "1: \n\t"\ | 125 "1: \n\t"\ |
124 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\ | 126 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\ |
125 "mov (%%"REG_d"), %%"REG_S" \n\t"\ | 127 "mov (%%"REG_d"), %%"REG_S" \n\t"\ |
126 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ | 128 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ |
127 "movq %%mm3, %%mm4 \n\t"\ | 129 "movq %%mm3, %%mm4 \n\t"\ |
128 ".balign 16 \n\t"\ | 130 ASMALIGN16\ |
129 "2: \n\t"\ | 131 "2: \n\t"\ |
130 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ | 132 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ |
131 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ | 133 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ |
132 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\ | 134 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\ |
133 "add $16, %%"REG_d" \n\t"\ | 135 "add $16, %%"REG_d" \n\t"\ |
141 \ | 143 \ |
142 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\ | 144 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\ |
143 "mov (%%"REG_d"), %%"REG_S" \n\t"\ | 145 "mov (%%"REG_d"), %%"REG_S" \n\t"\ |
144 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\ | 146 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\ |
145 "movq %%mm1, %%mm7 \n\t"\ | 147 "movq %%mm1, %%mm7 \n\t"\ |
146 ".balign 16 \n\t"\ | 148 ASMALIGN16\ |
147 "2: \n\t"\ | 149 "2: \n\t"\ |
148 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ | 150 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ |
149 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\ | 151 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\ |
150 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\ | 152 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\ |
151 "add $16, %%"REG_d" \n\t"\ | 153 "add $16, %%"REG_d" \n\t"\ |
203 "punpcklwd %%mm6, %%mm6 \n\t"\ | 205 "punpcklwd %%mm6, %%mm6 \n\t"\ |
204 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ | 206 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
205 "punpcklwd %%mm5, %%mm5 \n\t"\ | 207 "punpcklwd %%mm5, %%mm5 \n\t"\ |
206 "punpcklwd %%mm5, %%mm5 \n\t"\ | 208 "punpcklwd %%mm5, %%mm5 \n\t"\ |
207 "xor %%"REG_a", %%"REG_a" \n\t"\ | 209 "xor %%"REG_a", %%"REG_a" \n\t"\ |
208 ".balign 16 \n\t"\ | 210 ASMALIGN16\ |
209 "1: \n\t"\ | 211 "1: \n\t"\ |
210 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\ | 212 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\ |
211 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\ | 213 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\ |
212 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 214 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
213 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 215 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
256 "psraw $3, %%mm0 \n\t"\ | 258 "psraw $3, %%mm0 \n\t"\ |
257 "psraw $3, %%mm1 \n\t"\ | 259 "psraw $3, %%mm1 \n\t"\ |
258 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\ | 260 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\ |
259 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\ | 261 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\ |
260 "xor "#index", "#index" \n\t"\ | 262 "xor "#index", "#index" \n\t"\ |
261 ".balign 16 \n\t"\ | 263 ASMALIGN16\ |
262 "1: \n\t"\ | 264 "1: \n\t"\ |
263 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 265 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
264 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 266 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
265 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | 267 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
266 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | 268 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
288 | 290 |
289 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) | 291 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) |
290 | 292 |
291 #define REAL_YSCALEYUV2RGB(index, c) \ | 293 #define REAL_YSCALEYUV2RGB(index, c) \ |
292 "xor "#index", "#index" \n\t"\ | 294 "xor "#index", "#index" \n\t"\ |
293 ".balign 16 \n\t"\ | 295 ASMALIGN16\ |
294 "1: \n\t"\ | 296 "1: \n\t"\ |
295 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 297 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
296 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 298 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
297 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\ | 299 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\ |
298 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\ | 300 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\ |
354 "pxor %%mm7, %%mm7 \n\t" | 356 "pxor %%mm7, %%mm7 \n\t" |
355 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c) | 357 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c) |
356 | 358 |
357 #define REAL_YSCALEYUV2PACKED1(index, c) \ | 359 #define REAL_YSCALEYUV2PACKED1(index, c) \ |
358 "xor "#index", "#index" \n\t"\ | 360 "xor "#index", "#index" \n\t"\ |
359 ".balign 16 \n\t"\ | 361 ASMALIGN16\ |
360 "1: \n\t"\ | 362 "1: \n\t"\ |
361 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ | 363 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
362 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | 364 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
363 "psraw $7, %%mm3 \n\t" \ | 365 "psraw $7, %%mm3 \n\t" \ |
364 "psraw $7, %%mm4 \n\t" \ | 366 "psraw $7, %%mm4 \n\t" \ |
369 | 371 |
370 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) | 372 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) |
371 | 373 |
372 #define REAL_YSCALEYUV2RGB1(index, c) \ | 374 #define REAL_YSCALEYUV2RGB1(index, c) \ |
373 "xor "#index", "#index" \n\t"\ | 375 "xor "#index", "#index" \n\t"\ |
374 ".balign 16 \n\t"\ | 376 ASMALIGN16\ |
375 "1: \n\t"\ | 377 "1: \n\t"\ |
376 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ | 378 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
377 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | 379 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
378 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ | 380 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
379 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ | 381 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
418 "pxor %%mm7, %%mm7 \n\t" | 420 "pxor %%mm7, %%mm7 \n\t" |
419 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) | 421 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) |
420 | 422 |
421 #define REAL_YSCALEYUV2PACKED1b(index, c) \ | 423 #define REAL_YSCALEYUV2PACKED1b(index, c) \ |
422 "xor "#index", "#index" \n\t"\ | 424 "xor "#index", "#index" \n\t"\ |
423 ".balign 16 \n\t"\ | 425 ASMALIGN16\ |
424 "1: \n\t"\ | 426 "1: \n\t"\ |
425 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 427 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
426 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 428 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
427 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | 429 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
428 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | 430 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
437 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) | 439 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) |
438 | 440 |
439 // do vertical chrominance interpolation | 441 // do vertical chrominance interpolation |
440 #define REAL_YSCALEYUV2RGB1b(index, c) \ | 442 #define REAL_YSCALEYUV2RGB1b(index, c) \ |
441 "xor "#index", "#index" \n\t"\ | 443 "xor "#index", "#index" \n\t"\ |
442 ".balign 16 \n\t"\ | 444 ASMALIGN16\ |
443 "1: \n\t"\ | 445 "1: \n\t"\ |
444 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 446 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
445 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 447 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
446 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | 448 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
447 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | 449 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
1660 "mov %2, %%"REG_a" \n\t" | 1662 "mov %2, %%"REG_a" \n\t" |
1661 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | 1663 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1662 "movq "MANGLE(w1111)", %%mm5 \n\t" | 1664 "movq "MANGLE(w1111)", %%mm5 \n\t" |
1663 "pxor %%mm7, %%mm7 \n\t" | 1665 "pxor %%mm7, %%mm7 \n\t" |
1664 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" | 1666 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" |
1665 ".balign 16 \n\t" | 1667 ASMALIGN16 |
1666 "1: \n\t" | 1668 "1: \n\t" |
1667 PREFETCH" 64(%0, %%"REG_b") \n\t" | 1669 PREFETCH" 64(%0, %%"REG_b") \n\t" |
1668 "movd (%0, %%"REG_b"), %%mm0 \n\t" | 1670 "movd (%0, %%"REG_b"), %%mm0 \n\t" |
1669 "movd 3(%0, %%"REG_b"), %%mm1 \n\t" | 1671 "movd 3(%0, %%"REG_b"), %%mm1 \n\t" |
1670 "punpcklbw %%mm7, %%mm0 \n\t" | 1672 "punpcklbw %%mm7, %%mm0 \n\t" |
1746 "movq "MANGLE(w1111)", %%mm5 \n\t" | 1748 "movq "MANGLE(w1111)", %%mm5 \n\t" |
1747 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | 1749 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" |
1748 "pxor %%mm7, %%mm7 \n\t" | 1750 "pxor %%mm7, %%mm7 \n\t" |
1749 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b" \n\t" | 1751 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b" \n\t" |
1750 "add %%"REG_b", %%"REG_b" \n\t" | 1752 "add %%"REG_b", %%"REG_b" \n\t" |
1751 ".balign 16 \n\t" | 1753 ASMALIGN16 |
1752 "1: \n\t" | 1754 "1: \n\t" |
1753 PREFETCH" 64(%0, %%"REG_b") \n\t" | 1755 PREFETCH" 64(%0, %%"REG_b") \n\t" |
1754 PREFETCH" 64(%1, %%"REG_b") \n\t" | 1756 PREFETCH" 64(%1, %%"REG_b") \n\t" |
1755 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1757 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1756 "movq (%0, %%"REG_b"), %%mm0 \n\t" | 1758 "movq (%0, %%"REG_b"), %%mm0 \n\t" |
2053 asm volatile( | 2055 asm volatile( |
2054 "pxor %%mm7, %%mm7 \n\t" | 2056 "pxor %%mm7, %%mm7 \n\t" |
2055 "movq "MANGLE(w02)", %%mm6 \n\t" | 2057 "movq "MANGLE(w02)", %%mm6 \n\t" |
2056 "push %%"REG_BP" \n\t" // we use 7 regs here ... | 2058 "push %%"REG_BP" \n\t" // we use 7 regs here ... |
2057 "mov %%"REG_a", %%"REG_BP" \n\t" | 2059 "mov %%"REG_a", %%"REG_BP" \n\t" |
2058 ".balign 16 \n\t" | 2060 ASMALIGN16 |
2059 "1: \n\t" | 2061 "1: \n\t" |
2060 "movzwl (%2, %%"REG_BP"), %%eax \n\t" | 2062 "movzwl (%2, %%"REG_BP"), %%eax \n\t" |
2061 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t" | 2063 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t" |
2062 "movq (%1, %%"REG_BP", 4), %%mm1\n\t" | 2064 "movq (%1, %%"REG_BP", 4), %%mm1\n\t" |
2063 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t" | 2065 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t" |
2091 asm volatile( | 2093 asm volatile( |
2092 "pxor %%mm7, %%mm7 \n\t" | 2094 "pxor %%mm7, %%mm7 \n\t" |
2093 "movq "MANGLE(w02)", %%mm6 \n\t" | 2095 "movq "MANGLE(w02)", %%mm6 \n\t" |
2094 "push %%"REG_BP" \n\t" // we use 7 regs here ... | 2096 "push %%"REG_BP" \n\t" // we use 7 regs here ... |
2095 "mov %%"REG_a", %%"REG_BP" \n\t" | 2097 "mov %%"REG_a", %%"REG_BP" \n\t" |
2096 ".balign 16 \n\t" | 2098 ASMALIGN16 |
2097 "1: \n\t" | 2099 "1: \n\t" |
2098 "movzwl (%2, %%"REG_BP"), %%eax \n\t" | 2100 "movzwl (%2, %%"REG_BP"), %%eax \n\t" |
2099 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t" | 2101 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t" |
2100 "movq (%1, %%"REG_BP", 8), %%mm1\n\t" | 2102 "movq (%1, %%"REG_BP", 8), %%mm1\n\t" |
2101 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t" | 2103 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t" |
2140 filterPos-= counter/2; | 2142 filterPos-= counter/2; |
2141 dst-= counter/2; | 2143 dst-= counter/2; |
2142 asm volatile( | 2144 asm volatile( |
2143 "pxor %%mm7, %%mm7 \n\t" | 2145 "pxor %%mm7, %%mm7 \n\t" |
2144 "movq "MANGLE(w02)", %%mm6 \n\t" | 2146 "movq "MANGLE(w02)", %%mm6 \n\t" |
2145 ".balign 16 \n\t" | 2147 ASMALIGN16 |
2146 "1: \n\t" | 2148 "1: \n\t" |
2147 "mov %2, %%"REG_c" \n\t" | 2149 "mov %2, %%"REG_c" \n\t" |
2148 "movzwl (%%"REG_c", %0), %%eax \n\t" | 2150 "movzwl (%%"REG_c", %0), %%eax \n\t" |
2149 "movzwl 2(%%"REG_c", %0), %%ebx \n\t" | 2151 "movzwl 2(%%"REG_c", %0), %%ebx \n\t" |
2150 "mov %5, %%"REG_c" \n\t" | 2152 "mov %5, %%"REG_c" \n\t" |
2324 //NO MMX just normal asm ... | 2326 //NO MMX just normal asm ... |
2325 asm volatile( | 2327 asm volatile( |
2326 "xor %%"REG_a", %%"REG_a" \n\t" // i | 2328 "xor %%"REG_a", %%"REG_a" \n\t" // i |
2327 "xor %%"REG_b", %%"REG_b" \n\t" // xx | 2329 "xor %%"REG_b", %%"REG_b" \n\t" // xx |
2328 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | 2330 "xorl %%ecx, %%ecx \n\t" // 2*xalpha |
2329 ".balign 16 \n\t" | 2331 ASMALIGN16 |
2330 "1: \n\t" | 2332 "1: \n\t" |
2331 "movzbl (%0, %%"REG_b"), %%edi \n\t" //src[xx] | 2333 "movzbl (%0, %%"REG_b"), %%edi \n\t" //src[xx] |
2332 "movzbl 1(%0, %%"REG_b"), %%esi \n\t" //src[xx+1] | 2334 "movzbl 1(%0, %%"REG_b"), %%esi \n\t" //src[xx+1] |
2333 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | 2335 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] |
2334 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | 2336 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha |
2521 int xInc_mask = xInc & 0xffff; | 2523 int xInc_mask = xInc & 0xffff; |
2522 asm volatile( | 2524 asm volatile( |
2523 "xor %%"REG_a", %%"REG_a" \n\t" // i | 2525 "xor %%"REG_a", %%"REG_a" \n\t" // i |
2524 "xor %%"REG_b", %%"REG_b" \n\t" // xx | 2526 "xor %%"REG_b", %%"REG_b" \n\t" // xx |
2525 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | 2527 "xorl %%ecx, %%ecx \n\t" // 2*xalpha |
2526 ".balign 16 \n\t" | 2528 ASMALIGN16 |
2527 "1: \n\t" | 2529 "1: \n\t" |
2528 "mov %0, %%"REG_S" \n\t" | 2530 "mov %0, %%"REG_S" \n\t" |
2529 "movzbl (%%"REG_S", %%"REG_b"), %%edi \n\t" //src[xx] | 2531 "movzbl (%%"REG_S", %%"REG_b"), %%edi \n\t" //src[xx] |
2530 "movzbl 1(%%"REG_S", %%"REG_b"), %%esi \n\t" //src[xx+1] | 2532 "movzbl 1(%%"REG_S", %%"REG_b"), %%esi \n\t" //src[xx+1] |
2531 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | 2533 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] |