comparison postproc/swscale_template.c @ 18104:7b408d60de9e

add support for intel mac. mp3lib is not fixed yet.
author nplourde
date Sat, 15 Apr 2006 20:46:54 +0000
parents fbf94ea858f1
children b10d4b3cb9ec
comparison
equal deleted inserted replaced
18103:26ea12332e67 18104:7b408d60de9e
13 13
14 You should have received a copy of the GNU General Public License 14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software 15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
19 #include "asmalign.h"
18 20
19 #undef REAL_MOVNTQ 21 #undef REAL_MOVNTQ
20 #undef MOVNTQ 22 #undef MOVNTQ
21 #undef PAVGB 23 #undef PAVGB
22 #undef PREFETCH 24 #undef PREFETCH
69 "xor %%"REG_a", %%"REG_a" \n\t"\ 71 "xor %%"REG_a", %%"REG_a" \n\t"\
70 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ 72 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
71 "movq %%mm3, %%mm4 \n\t"\ 73 "movq %%mm3, %%mm4 \n\t"\
72 "lea " offset "(%0), %%"REG_d" \n\t"\ 74 "lea " offset "(%0), %%"REG_d" \n\t"\
73 "mov (%%"REG_d"), %%"REG_S" \n\t"\ 75 "mov (%%"REG_d"), %%"REG_S" \n\t"\
74 ".balign 16 \n\t" /* FIXME Unroll? */\ 76 ASMALIGN16 /* FIXME Unroll? */\
75 "1: \n\t"\ 77 "1: \n\t"\
76 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ 78 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
77 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\ 79 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
78 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\ 80 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
79 "add $16, %%"REG_d" \n\t"\ 81 "add $16, %%"REG_d" \n\t"\
96 "mov (%%"REG_d"), %%"REG_S" \n\t"\ 98 "mov (%%"REG_d"), %%"REG_S" \n\t"\
97 "jb 1b \n\t" 99 "jb 1b \n\t"
98 100
99 #define YSCALEYUV2YV121 \ 101 #define YSCALEYUV2YV121 \
100 "mov %2, %%"REG_a" \n\t"\ 102 "mov %2, %%"REG_a" \n\t"\
101 ".balign 16 \n\t" /* FIXME Unroll? */\ 103 ASMALIGN16 /* FIXME Unroll? */\
102 "1: \n\t"\ 104 "1: \n\t"\
103 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ 105 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
104 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\ 106 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
105 "psraw $7, %%mm0 \n\t"\ 107 "psraw $7, %%mm0 \n\t"\
106 "psraw $7, %%mm1 \n\t"\ 108 "psraw $7, %%mm1 \n\t"\
116 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) 118 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
117 : "%eax", "%ebx", "%ecx", "%edx", "%esi" 119 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
118 */ 120 */
119 #define YSCALEYUV2PACKEDX \ 121 #define YSCALEYUV2PACKEDX \
120 "xor %%"REG_a", %%"REG_a" \n\t"\ 122 "xor %%"REG_a", %%"REG_a" \n\t"\
121 ".balign 16 \n\t"\ 123 ASMALIGN16\
122 "nop \n\t"\ 124 "nop \n\t"\
123 "1: \n\t"\ 125 "1: \n\t"\
124 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\ 126 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
125 "mov (%%"REG_d"), %%"REG_S" \n\t"\ 127 "mov (%%"REG_d"), %%"REG_S" \n\t"\
126 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ 128 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
127 "movq %%mm3, %%mm4 \n\t"\ 129 "movq %%mm3, %%mm4 \n\t"\
128 ".balign 16 \n\t"\ 130 ASMALIGN16\
129 "2: \n\t"\ 131 "2: \n\t"\
130 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ 132 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
131 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ 133 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
132 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\ 134 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
133 "add $16, %%"REG_d" \n\t"\ 135 "add $16, %%"REG_d" \n\t"\
141 \ 143 \
142 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\ 144 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
143 "mov (%%"REG_d"), %%"REG_S" \n\t"\ 145 "mov (%%"REG_d"), %%"REG_S" \n\t"\
144 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\ 146 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
145 "movq %%mm1, %%mm7 \n\t"\ 147 "movq %%mm1, %%mm7 \n\t"\
146 ".balign 16 \n\t"\ 148 ASMALIGN16\
147 "2: \n\t"\ 149 "2: \n\t"\
148 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ 150 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
149 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\ 151 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
150 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\ 152 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
151 "add $16, %%"REG_d" \n\t"\ 153 "add $16, %%"REG_d" \n\t"\
203 "punpcklwd %%mm6, %%mm6 \n\t"\ 205 "punpcklwd %%mm6, %%mm6 \n\t"\
204 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ 206 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
205 "punpcklwd %%mm5, %%mm5 \n\t"\ 207 "punpcklwd %%mm5, %%mm5 \n\t"\
206 "punpcklwd %%mm5, %%mm5 \n\t"\ 208 "punpcklwd %%mm5, %%mm5 \n\t"\
207 "xor %%"REG_a", %%"REG_a" \n\t"\ 209 "xor %%"REG_a", %%"REG_a" \n\t"\
208 ".balign 16 \n\t"\ 210 ASMALIGN16\
209 "1: \n\t"\ 211 "1: \n\t"\
210 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\ 212 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\
211 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\ 213 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\
212 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\ 214 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
213 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\ 215 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
256 "psraw $3, %%mm0 \n\t"\ 258 "psraw $3, %%mm0 \n\t"\
257 "psraw $3, %%mm1 \n\t"\ 259 "psraw $3, %%mm1 \n\t"\
258 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\ 260 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
259 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\ 261 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
260 "xor "#index", "#index" \n\t"\ 262 "xor "#index", "#index" \n\t"\
261 ".balign 16 \n\t"\ 263 ASMALIGN16\
262 "1: \n\t"\ 264 "1: \n\t"\
263 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ 265 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
264 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ 266 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
265 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ 267 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
266 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ 268 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
288 290
289 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) 291 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
290 292
291 #define REAL_YSCALEYUV2RGB(index, c) \ 293 #define REAL_YSCALEYUV2RGB(index, c) \
292 "xor "#index", "#index" \n\t"\ 294 "xor "#index", "#index" \n\t"\
293 ".balign 16 \n\t"\ 295 ASMALIGN16\
294 "1: \n\t"\ 296 "1: \n\t"\
295 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ 297 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
296 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ 298 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
297 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\ 299 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\
298 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\ 300 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\
354 "pxor %%mm7, %%mm7 \n\t" 356 "pxor %%mm7, %%mm7 \n\t"
355 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c) 357 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
356 358
357 #define REAL_YSCALEYUV2PACKED1(index, c) \ 359 #define REAL_YSCALEYUV2PACKED1(index, c) \
358 "xor "#index", "#index" \n\t"\ 360 "xor "#index", "#index" \n\t"\
359 ".balign 16 \n\t"\ 361 ASMALIGN16\
360 "1: \n\t"\ 362 "1: \n\t"\
361 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ 363 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
362 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ 364 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
363 "psraw $7, %%mm3 \n\t" \ 365 "psraw $7, %%mm3 \n\t" \
364 "psraw $7, %%mm4 \n\t" \ 366 "psraw $7, %%mm4 \n\t" \
369 371
370 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) 372 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
371 373
372 #define REAL_YSCALEYUV2RGB1(index, c) \ 374 #define REAL_YSCALEYUV2RGB1(index, c) \
373 "xor "#index", "#index" \n\t"\ 375 "xor "#index", "#index" \n\t"\
374 ".balign 16 \n\t"\ 376 ASMALIGN16\
375 "1: \n\t"\ 377 "1: \n\t"\
376 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ 378 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
377 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ 379 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
378 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ 380 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
379 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ 381 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
418 "pxor %%mm7, %%mm7 \n\t" 420 "pxor %%mm7, %%mm7 \n\t"
419 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) 421 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
420 422
421 #define REAL_YSCALEYUV2PACKED1b(index, c) \ 423 #define REAL_YSCALEYUV2PACKED1b(index, c) \
422 "xor "#index", "#index" \n\t"\ 424 "xor "#index", "#index" \n\t"\
423 ".balign 16 \n\t"\ 425 ASMALIGN16\
424 "1: \n\t"\ 426 "1: \n\t"\
425 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ 427 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
426 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ 428 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
427 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ 429 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
428 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ 430 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
437 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) 439 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
438 440
439 // do vertical chrominance interpolation 441 // do vertical chrominance interpolation
440 #define REAL_YSCALEYUV2RGB1b(index, c) \ 442 #define REAL_YSCALEYUV2RGB1b(index, c) \
441 "xor "#index", "#index" \n\t"\ 443 "xor "#index", "#index" \n\t"\
442 ".balign 16 \n\t"\ 444 ASMALIGN16\
443 "1: \n\t"\ 445 "1: \n\t"\
444 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ 446 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
445 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ 447 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
446 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ 448 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
447 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ 449 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1660 "mov %2, %%"REG_a" \n\t" 1662 "mov %2, %%"REG_a" \n\t"
1661 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" 1663 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
1662 "movq "MANGLE(w1111)", %%mm5 \n\t" 1664 "movq "MANGLE(w1111)", %%mm5 \n\t"
1663 "pxor %%mm7, %%mm7 \n\t" 1665 "pxor %%mm7, %%mm7 \n\t"
1664 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" 1666 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t"
1665 ".balign 16 \n\t" 1667 ASMALIGN16
1666 "1: \n\t" 1668 "1: \n\t"
1667 PREFETCH" 64(%0, %%"REG_b") \n\t" 1669 PREFETCH" 64(%0, %%"REG_b") \n\t"
1668 "movd (%0, %%"REG_b"), %%mm0 \n\t" 1670 "movd (%0, %%"REG_b"), %%mm0 \n\t"
1669 "movd 3(%0, %%"REG_b"), %%mm1 \n\t" 1671 "movd 3(%0, %%"REG_b"), %%mm1 \n\t"
1670 "punpcklbw %%mm7, %%mm0 \n\t" 1672 "punpcklbw %%mm7, %%mm0 \n\t"
1746 "movq "MANGLE(w1111)", %%mm5 \n\t" 1748 "movq "MANGLE(w1111)", %%mm5 \n\t"
1747 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" 1749 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
1748 "pxor %%mm7, %%mm7 \n\t" 1750 "pxor %%mm7, %%mm7 \n\t"
1749 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b" \n\t" 1751 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b" \n\t"
1750 "add %%"REG_b", %%"REG_b" \n\t" 1752 "add %%"REG_b", %%"REG_b" \n\t"
1751 ".balign 16 \n\t" 1753 ASMALIGN16
1752 "1: \n\t" 1754 "1: \n\t"
1753 PREFETCH" 64(%0, %%"REG_b") \n\t" 1755 PREFETCH" 64(%0, %%"REG_b") \n\t"
1754 PREFETCH" 64(%1, %%"REG_b") \n\t" 1756 PREFETCH" 64(%1, %%"REG_b") \n\t"
1755 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1757 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1756 "movq (%0, %%"REG_b"), %%mm0 \n\t" 1758 "movq (%0, %%"REG_b"), %%mm0 \n\t"
2053 asm volatile( 2055 asm volatile(
2054 "pxor %%mm7, %%mm7 \n\t" 2056 "pxor %%mm7, %%mm7 \n\t"
2055 "movq "MANGLE(w02)", %%mm6 \n\t" 2057 "movq "MANGLE(w02)", %%mm6 \n\t"
2056 "push %%"REG_BP" \n\t" // we use 7 regs here ... 2058 "push %%"REG_BP" \n\t" // we use 7 regs here ...
2057 "mov %%"REG_a", %%"REG_BP" \n\t" 2059 "mov %%"REG_a", %%"REG_BP" \n\t"
2058 ".balign 16 \n\t" 2060 ASMALIGN16
2059 "1: \n\t" 2061 "1: \n\t"
2060 "movzwl (%2, %%"REG_BP"), %%eax \n\t" 2062 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
2061 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t" 2063 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
2062 "movq (%1, %%"REG_BP", 4), %%mm1\n\t" 2064 "movq (%1, %%"REG_BP", 4), %%mm1\n\t"
2063 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t" 2065 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t"
2091 asm volatile( 2093 asm volatile(
2092 "pxor %%mm7, %%mm7 \n\t" 2094 "pxor %%mm7, %%mm7 \n\t"
2093 "movq "MANGLE(w02)", %%mm6 \n\t" 2095 "movq "MANGLE(w02)", %%mm6 \n\t"
2094 "push %%"REG_BP" \n\t" // we use 7 regs here ... 2096 "push %%"REG_BP" \n\t" // we use 7 regs here ...
2095 "mov %%"REG_a", %%"REG_BP" \n\t" 2097 "mov %%"REG_a", %%"REG_BP" \n\t"
2096 ".balign 16 \n\t" 2098 ASMALIGN16
2097 "1: \n\t" 2099 "1: \n\t"
2098 "movzwl (%2, %%"REG_BP"), %%eax \n\t" 2100 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
2099 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t" 2101 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
2100 "movq (%1, %%"REG_BP", 8), %%mm1\n\t" 2102 "movq (%1, %%"REG_BP", 8), %%mm1\n\t"
2101 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t" 2103 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t"
2140 filterPos-= counter/2; 2142 filterPos-= counter/2;
2141 dst-= counter/2; 2143 dst-= counter/2;
2142 asm volatile( 2144 asm volatile(
2143 "pxor %%mm7, %%mm7 \n\t" 2145 "pxor %%mm7, %%mm7 \n\t"
2144 "movq "MANGLE(w02)", %%mm6 \n\t" 2146 "movq "MANGLE(w02)", %%mm6 \n\t"
2145 ".balign 16 \n\t" 2147 ASMALIGN16
2146 "1: \n\t" 2148 "1: \n\t"
2147 "mov %2, %%"REG_c" \n\t" 2149 "mov %2, %%"REG_c" \n\t"
2148 "movzwl (%%"REG_c", %0), %%eax \n\t" 2150 "movzwl (%%"REG_c", %0), %%eax \n\t"
2149 "movzwl 2(%%"REG_c", %0), %%ebx \n\t" 2151 "movzwl 2(%%"REG_c", %0), %%ebx \n\t"
2150 "mov %5, %%"REG_c" \n\t" 2152 "mov %5, %%"REG_c" \n\t"
2324 //NO MMX just normal asm ... 2326 //NO MMX just normal asm ...
2325 asm volatile( 2327 asm volatile(
2326 "xor %%"REG_a", %%"REG_a" \n\t" // i 2328 "xor %%"REG_a", %%"REG_a" \n\t" // i
2327 "xor %%"REG_b", %%"REG_b" \n\t" // xx 2329 "xor %%"REG_b", %%"REG_b" \n\t" // xx
2328 "xorl %%ecx, %%ecx \n\t" // 2*xalpha 2330 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
2329 ".balign 16 \n\t" 2331 ASMALIGN16
2330 "1: \n\t" 2332 "1: \n\t"
2331 "movzbl (%0, %%"REG_b"), %%edi \n\t" //src[xx] 2333 "movzbl (%0, %%"REG_b"), %%edi \n\t" //src[xx]
2332 "movzbl 1(%0, %%"REG_b"), %%esi \n\t" //src[xx+1] 2334 "movzbl 1(%0, %%"REG_b"), %%esi \n\t" //src[xx+1]
2333 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] 2335 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
2334 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha 2336 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2521 int xInc_mask = xInc & 0xffff; 2523 int xInc_mask = xInc & 0xffff;
2522 asm volatile( 2524 asm volatile(
2523 "xor %%"REG_a", %%"REG_a" \n\t" // i 2525 "xor %%"REG_a", %%"REG_a" \n\t" // i
2524 "xor %%"REG_b", %%"REG_b" \n\t" // xx 2526 "xor %%"REG_b", %%"REG_b" \n\t" // xx
2525 "xorl %%ecx, %%ecx \n\t" // 2*xalpha 2527 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
2526 ".balign 16 \n\t" 2528 ASMALIGN16
2527 "1: \n\t" 2529 "1: \n\t"
2528 "mov %0, %%"REG_S" \n\t" 2530 "mov %0, %%"REG_S" \n\t"
2529 "movzbl (%%"REG_S", %%"REG_b"), %%edi \n\t" //src[xx] 2531 "movzbl (%%"REG_S", %%"REG_b"), %%edi \n\t" //src[xx]
2530 "movzbl 1(%%"REG_S", %%"REG_b"), %%esi \n\t" //src[xx+1] 2532 "movzbl 1(%%"REG_S", %%"REG_b"), %%esi \n\t" //src[xx+1]
2531 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] 2533 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]