Mercurial > mplayer.hg
comparison libswscale/rgb2rgb_template.c @ 19372:6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
author | diego |
---|---|
date | Sun, 13 Aug 2006 00:21:14 +0000 |
parents | 8579acff875e |
children | 8fe37c66d10a |
comparison
equal
deleted
inserted
replaced
19371:829b824a7d33 | 19372:6334c14b38eb |
---|---|
9 * lot of big-endian byteorder fixes by Alex Beregszaszi | 9 * lot of big-endian byteorder fixes by Alex Beregszaszi |
10 */ | 10 */ |
11 | 11 |
12 #include <stddef.h> | 12 #include <stddef.h> |
13 #include <inttypes.h> /* for __WORDSIZE */ | 13 #include <inttypes.h> /* for __WORDSIZE */ |
14 | |
15 #include "asmalign.h" | |
16 | 14 |
17 #ifndef __WORDSIZE | 15 #ifndef __WORDSIZE |
18 // #warning You have misconfigured system and probably will lose performance! | 16 // #warning You have misconfigured system and probably will lose performance! |
19 #define __WORDSIZE MP_WORDSIZE | 17 #define __WORDSIZE MP_WORDSIZE |
20 #endif | 18 #endif |
341 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) | 339 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) |
342 asm volatile( | 340 asm volatile( |
343 "movq %3, %%mm5 \n\t" | 341 "movq %3, %%mm5 \n\t" |
344 "movq %4, %%mm6 \n\t" | 342 "movq %4, %%mm6 \n\t" |
345 "movq %5, %%mm7 \n\t" | 343 "movq %5, %%mm7 \n\t" |
346 ASMALIGN16 | 344 ASMALIGN(4) |
347 "1: \n\t" | 345 "1: \n\t" |
348 PREFETCH" 32(%1) \n\t" | 346 PREFETCH" 32(%1) \n\t" |
349 "movd (%1), %%mm0 \n\t" | 347 "movd (%1), %%mm0 \n\t" |
350 "movd 4(%1), %%mm3 \n\t" | 348 "movd 4(%1), %%mm3 \n\t" |
351 "punpckldq 8(%1), %%mm0 \n\t" | 349 "punpckldq 8(%1), %%mm0 \n\t" |
498 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) | 496 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) |
499 asm volatile( | 497 asm volatile( |
500 "movq %3, %%mm5 \n\t" | 498 "movq %3, %%mm5 \n\t" |
501 "movq %4, %%mm6 \n\t" | 499 "movq %4, %%mm6 \n\t" |
502 "movq %5, %%mm7 \n\t" | 500 "movq %5, %%mm7 \n\t" |
503 ASMALIGN16 | 501 ASMALIGN(4) |
504 "1: \n\t" | 502 "1: \n\t" |
505 PREFETCH" 32(%1) \n\t" | 503 PREFETCH" 32(%1) \n\t" |
506 "movd (%1), %%mm0 \n\t" | 504 "movd (%1), %%mm0 \n\t" |
507 "movd 4(%1), %%mm3 \n\t" | 505 "movd 4(%1), %%mm3 \n\t" |
508 "punpckldq 8(%1), %%mm0 \n\t" | 506 "punpckldq 8(%1), %%mm0 \n\t" |
1353 { | 1351 { |
1354 #ifdef HAVE_MMX | 1352 #ifdef HAVE_MMX |
1355 /* TODO: unroll this loop */ | 1353 /* TODO: unroll this loop */ |
1356 asm volatile ( | 1354 asm volatile ( |
1357 "xor %%"REG_a", %%"REG_a" \n\t" | 1355 "xor %%"REG_a", %%"REG_a" \n\t" |
1358 ASMALIGN16 | 1356 ASMALIGN(4) |
1359 "1: \n\t" | 1357 "1: \n\t" |
1360 PREFETCH" 32(%0, %%"REG_a") \n\t" | 1358 PREFETCH" 32(%0, %%"REG_a") \n\t" |
1361 "movq (%0, %%"REG_a"), %%mm0 \n\t" | 1359 "movq (%0, %%"REG_a"), %%mm0 \n\t" |
1362 "movq %%mm0, %%mm1 \n\t" | 1360 "movq %%mm0, %%mm1 \n\t" |
1363 "movq %%mm0, %%mm2 \n\t" | 1361 "movq %%mm0, %%mm2 \n\t" |
1403 long mmx_size= 23 - src_size; | 1401 long mmx_size= 23 - src_size; |
1404 asm volatile ( | 1402 asm volatile ( |
1405 "movq "MANGLE(mask24r)", %%mm5 \n\t" | 1403 "movq "MANGLE(mask24r)", %%mm5 \n\t" |
1406 "movq "MANGLE(mask24g)", %%mm6 \n\t" | 1404 "movq "MANGLE(mask24g)", %%mm6 \n\t" |
1407 "movq "MANGLE(mask24b)", %%mm7 \n\t" | 1405 "movq "MANGLE(mask24b)", %%mm7 \n\t" |
1408 ASMALIGN16 | 1406 ASMALIGN(4) |
1409 "1: \n\t" | 1407 "1: \n\t" |
1410 PREFETCH" 32(%1, %%"REG_a") \n\t" | 1408 PREFETCH" 32(%1, %%"REG_a") \n\t" |
1411 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG | 1409 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG |
1412 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG | 1410 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG |
1413 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B | 1411 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B |
1473 { | 1471 { |
1474 #ifdef HAVE_MMX | 1472 #ifdef HAVE_MMX |
1475 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) | 1473 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) |
1476 asm volatile( | 1474 asm volatile( |
1477 "xor %%"REG_a", %%"REG_a" \n\t" | 1475 "xor %%"REG_a", %%"REG_a" \n\t" |
1478 ASMALIGN16 | 1476 ASMALIGN(4) |
1479 "1: \n\t" | 1477 "1: \n\t" |
1480 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | 1478 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" |
1481 PREFETCH" 32(%2, %%"REG_a") \n\t" | 1479 PREFETCH" 32(%2, %%"REG_a") \n\t" |
1482 PREFETCH" 32(%3, %%"REG_a") \n\t" | 1480 PREFETCH" 32(%3, %%"REG_a") \n\t" |
1483 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) | 1481 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) |
1626 { | 1624 { |
1627 #ifdef HAVE_MMX | 1625 #ifdef HAVE_MMX |
1628 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) | 1626 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) |
1629 asm volatile( | 1627 asm volatile( |
1630 "xor %%"REG_a", %%"REG_a" \n\t" | 1628 "xor %%"REG_a", %%"REG_a" \n\t" |
1631 ASMALIGN16 | 1629 ASMALIGN(4) |
1632 "1: \n\t" | 1630 "1: \n\t" |
1633 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | 1631 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" |
1634 PREFETCH" 32(%2, %%"REG_a") \n\t" | 1632 PREFETCH" 32(%2, %%"REG_a") \n\t" |
1635 PREFETCH" 32(%3, %%"REG_a") \n\t" | 1633 PREFETCH" 32(%3, %%"REG_a") \n\t" |
1636 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) | 1634 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) |
1750 #ifdef HAVE_MMX | 1748 #ifdef HAVE_MMX |
1751 asm volatile( | 1749 asm volatile( |
1752 "xor %%"REG_a", %%"REG_a" \n\t" | 1750 "xor %%"REG_a", %%"REG_a" \n\t" |
1753 "pcmpeqw %%mm7, %%mm7 \n\t" | 1751 "pcmpeqw %%mm7, %%mm7 \n\t" |
1754 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | 1752 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... |
1755 ASMALIGN16 | 1753 ASMALIGN(4) |
1756 "1: \n\t" | 1754 "1: \n\t" |
1757 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | 1755 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" |
1758 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | 1756 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) |
1759 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4) | 1757 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4) |
1760 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) | 1758 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) |
1803 ydst += lumStride; | 1801 ydst += lumStride; |
1804 src += srcStride; | 1802 src += srcStride; |
1805 | 1803 |
1806 asm volatile( | 1804 asm volatile( |
1807 "xor %%"REG_a", %%"REG_a" \n\t" | 1805 "xor %%"REG_a", %%"REG_a" \n\t" |
1808 ASMALIGN16 | 1806 ASMALIGN(4) |
1809 "1: \n\t" | 1807 "1: \n\t" |
1810 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | 1808 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" |
1811 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | 1809 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) |
1812 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4) | 1810 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4) |
1813 "movq 16(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(8) | 1811 "movq 16(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(8) |
1988 #ifdef HAVE_MMX | 1986 #ifdef HAVE_MMX |
1989 asm volatile( | 1987 asm volatile( |
1990 "xorl %%eax, %%eax \n\t" | 1988 "xorl %%eax, %%eax \n\t" |
1991 "pcmpeqw %%mm7, %%mm7 \n\t" | 1989 "pcmpeqw %%mm7, %%mm7 \n\t" |
1992 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | 1990 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... |
1993 ASMALIGN16 | 1991 ASMALIGN(4) |
1994 "1: \n\t" | 1992 "1: \n\t" |
1995 PREFETCH" 64(%0, %%eax, 4) \n\t" | 1993 PREFETCH" 64(%0, %%eax, 4) \n\t" |
1996 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) | 1994 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) |
1997 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4) | 1995 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4) |
1998 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) | 1996 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) |
2041 ydst += lumStride; | 2039 ydst += lumStride; |
2042 src += srcStride; | 2040 src += srcStride; |
2043 | 2041 |
2044 asm volatile( | 2042 asm volatile( |
2045 "xorl %%eax, %%eax \n\t" | 2043 "xorl %%eax, %%eax \n\t" |
2046 ASMALIGN16 | 2044 ASMALIGN(4) |
2047 "1: \n\t" | 2045 "1: \n\t" |
2048 PREFETCH" 64(%0, %%eax, 4) \n\t" | 2046 PREFETCH" 64(%0, %%eax, 4) \n\t" |
2049 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) | 2047 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) |
2050 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) | 2048 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) |
2051 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) | 2049 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) |
2119 "mov %2, %%"REG_a" \n\t" | 2117 "mov %2, %%"REG_a" \n\t" |
2120 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | 2118 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
2121 "movq "MANGLE(w1111)", %%mm5 \n\t" | 2119 "movq "MANGLE(w1111)", %%mm5 \n\t" |
2122 "pxor %%mm7, %%mm7 \n\t" | 2120 "pxor %%mm7, %%mm7 \n\t" |
2123 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" | 2121 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" |
2124 ASMALIGN16 | 2122 ASMALIGN(4) |
2125 "1: \n\t" | 2123 "1: \n\t" |
2126 PREFETCH" 64(%0, %%"REG_b") \n\t" | 2124 PREFETCH" 64(%0, %%"REG_b") \n\t" |
2127 "movd (%0, %%"REG_b"), %%mm0 \n\t" | 2125 "movd (%0, %%"REG_b"), %%mm0 \n\t" |
2128 "movd 3(%0, %%"REG_b"), %%mm1 \n\t" | 2126 "movd 3(%0, %%"REG_b"), %%mm1 \n\t" |
2129 "punpcklbw %%mm7, %%mm0 \n\t" | 2127 "punpcklbw %%mm7, %%mm0 \n\t" |
2193 "movq "MANGLE(w1111)", %%mm5 \n\t" | 2191 "movq "MANGLE(w1111)", %%mm5 \n\t" |
2194 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | 2192 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" |
2195 "pxor %%mm7, %%mm7 \n\t" | 2193 "pxor %%mm7, %%mm7 \n\t" |
2196 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" | 2194 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" |
2197 "add %%"REG_b", %%"REG_b" \n\t" | 2195 "add %%"REG_b", %%"REG_b" \n\t" |
2198 ASMALIGN16 | 2196 ASMALIGN(4) |
2199 "1: \n\t" | 2197 "1: \n\t" |
2200 PREFETCH" 64(%0, %%"REG_b") \n\t" | 2198 PREFETCH" 64(%0, %%"REG_b") \n\t" |
2201 PREFETCH" 64(%1, %%"REG_b") \n\t" | 2199 PREFETCH" 64(%1, %%"REG_b") \n\t" |
2202 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2200 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
2203 "movq (%0, %%"REG_b"), %%mm0 \n\t" | 2201 "movq (%0, %%"REG_b"), %%mm0 \n\t" |