comparison libswscale/rgb2rgb_template.c @ 19372:6334c14b38eb

Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
author diego
date Sun, 13 Aug 2006 00:21:14 +0000
parents 8579acff875e
children 8fe37c66d10a
comparison
equal deleted inserted replaced
19371:829b824a7d33 19372:6334c14b38eb
9 * lot of big-endian byteorder fixes by Alex Beregszaszi 9 * lot of big-endian byteorder fixes by Alex Beregszaszi
10 */ 10 */
11 11
12 #include <stddef.h> 12 #include <stddef.h>
13 #include <inttypes.h> /* for __WORDSIZE */ 13 #include <inttypes.h> /* for __WORDSIZE */
14
15 #include "asmalign.h"
16 14
17 #ifndef __WORDSIZE 15 #ifndef __WORDSIZE
18 // #warning You have misconfigured system and probably will lose performance! 16 // #warning You have misconfigured system and probably will lose performance!
19 #define __WORDSIZE MP_WORDSIZE 17 #define __WORDSIZE MP_WORDSIZE
20 #endif 18 #endif
341 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) 339 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
342 asm volatile( 340 asm volatile(
343 "movq %3, %%mm5 \n\t" 341 "movq %3, %%mm5 \n\t"
344 "movq %4, %%mm6 \n\t" 342 "movq %4, %%mm6 \n\t"
345 "movq %5, %%mm7 \n\t" 343 "movq %5, %%mm7 \n\t"
346 ASMALIGN16 344 ASMALIGN(4)
347 "1: \n\t" 345 "1: \n\t"
348 PREFETCH" 32(%1) \n\t" 346 PREFETCH" 32(%1) \n\t"
349 "movd (%1), %%mm0 \n\t" 347 "movd (%1), %%mm0 \n\t"
350 "movd 4(%1), %%mm3 \n\t" 348 "movd 4(%1), %%mm3 \n\t"
351 "punpckldq 8(%1), %%mm0 \n\t" 349 "punpckldq 8(%1), %%mm0 \n\t"
498 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) 496 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
499 asm volatile( 497 asm volatile(
500 "movq %3, %%mm5 \n\t" 498 "movq %3, %%mm5 \n\t"
501 "movq %4, %%mm6 \n\t" 499 "movq %4, %%mm6 \n\t"
502 "movq %5, %%mm7 \n\t" 500 "movq %5, %%mm7 \n\t"
503 ASMALIGN16 501 ASMALIGN(4)
504 "1: \n\t" 502 "1: \n\t"
505 PREFETCH" 32(%1) \n\t" 503 PREFETCH" 32(%1) \n\t"
506 "movd (%1), %%mm0 \n\t" 504 "movd (%1), %%mm0 \n\t"
507 "movd 4(%1), %%mm3 \n\t" 505 "movd 4(%1), %%mm3 \n\t"
508 "punpckldq 8(%1), %%mm0 \n\t" 506 "punpckldq 8(%1), %%mm0 \n\t"
1353 { 1351 {
1354 #ifdef HAVE_MMX 1352 #ifdef HAVE_MMX
1355 /* TODO: unroll this loop */ 1353 /* TODO: unroll this loop */
1356 asm volatile ( 1354 asm volatile (
1357 "xor %%"REG_a", %%"REG_a" \n\t" 1355 "xor %%"REG_a", %%"REG_a" \n\t"
1358 ASMALIGN16 1356 ASMALIGN(4)
1359 "1: \n\t" 1357 "1: \n\t"
1360 PREFETCH" 32(%0, %%"REG_a") \n\t" 1358 PREFETCH" 32(%0, %%"REG_a") \n\t"
1361 "movq (%0, %%"REG_a"), %%mm0 \n\t" 1359 "movq (%0, %%"REG_a"), %%mm0 \n\t"
1362 "movq %%mm0, %%mm1 \n\t" 1360 "movq %%mm0, %%mm1 \n\t"
1363 "movq %%mm0, %%mm2 \n\t" 1361 "movq %%mm0, %%mm2 \n\t"
1403 long mmx_size= 23 - src_size; 1401 long mmx_size= 23 - src_size;
1404 asm volatile ( 1402 asm volatile (
1405 "movq "MANGLE(mask24r)", %%mm5 \n\t" 1403 "movq "MANGLE(mask24r)", %%mm5 \n\t"
1406 "movq "MANGLE(mask24g)", %%mm6 \n\t" 1404 "movq "MANGLE(mask24g)", %%mm6 \n\t"
1407 "movq "MANGLE(mask24b)", %%mm7 \n\t" 1405 "movq "MANGLE(mask24b)", %%mm7 \n\t"
1408 ASMALIGN16 1406 ASMALIGN(4)
1409 "1: \n\t" 1407 "1: \n\t"
1410 PREFETCH" 32(%1, %%"REG_a") \n\t" 1408 PREFETCH" 32(%1, %%"REG_a") \n\t"
1411 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG 1409 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
1412 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG 1410 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG
1413 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B 1411 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B
1473 { 1471 {
1474 #ifdef HAVE_MMX 1472 #ifdef HAVE_MMX
1475 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) 1473 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
1476 asm volatile( 1474 asm volatile(
1477 "xor %%"REG_a", %%"REG_a" \n\t" 1475 "xor %%"REG_a", %%"REG_a" \n\t"
1478 ASMALIGN16 1476 ASMALIGN(4)
1479 "1: \n\t" 1477 "1: \n\t"
1480 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" 1478 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
1481 PREFETCH" 32(%2, %%"REG_a") \n\t" 1479 PREFETCH" 32(%2, %%"REG_a") \n\t"
1482 PREFETCH" 32(%3, %%"REG_a") \n\t" 1480 PREFETCH" 32(%3, %%"REG_a") \n\t"
1483 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) 1481 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
1626 { 1624 {
1627 #ifdef HAVE_MMX 1625 #ifdef HAVE_MMX
1628 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) 1626 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
1629 asm volatile( 1627 asm volatile(
1630 "xor %%"REG_a", %%"REG_a" \n\t" 1628 "xor %%"REG_a", %%"REG_a" \n\t"
1631 ASMALIGN16 1629 ASMALIGN(4)
1632 "1: \n\t" 1630 "1: \n\t"
1633 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" 1631 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
1634 PREFETCH" 32(%2, %%"REG_a") \n\t" 1632 PREFETCH" 32(%2, %%"REG_a") \n\t"
1635 PREFETCH" 32(%3, %%"REG_a") \n\t" 1633 PREFETCH" 32(%3, %%"REG_a") \n\t"
1636 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) 1634 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
1750 #ifdef HAVE_MMX 1748 #ifdef HAVE_MMX
1751 asm volatile( 1749 asm volatile(
1752 "xor %%"REG_a", %%"REG_a" \n\t" 1750 "xor %%"REG_a", %%"REG_a" \n\t"
1753 "pcmpeqw %%mm7, %%mm7 \n\t" 1751 "pcmpeqw %%mm7, %%mm7 \n\t"
1754 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... 1752 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1755 ASMALIGN16 1753 ASMALIGN(4)
1756 "1: \n\t" 1754 "1: \n\t"
1757 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" 1755 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
1758 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) 1756 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
1759 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4) 1757 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4)
1760 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) 1758 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
1803 ydst += lumStride; 1801 ydst += lumStride;
1804 src += srcStride; 1802 src += srcStride;
1805 1803
1806 asm volatile( 1804 asm volatile(
1807 "xor %%"REG_a", %%"REG_a" \n\t" 1805 "xor %%"REG_a", %%"REG_a" \n\t"
1808 ASMALIGN16 1806 ASMALIGN(4)
1809 "1: \n\t" 1807 "1: \n\t"
1810 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" 1808 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
1811 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) 1809 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
1812 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4) 1810 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4)
1813 "movq 16(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(8) 1811 "movq 16(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(8)
1988 #ifdef HAVE_MMX 1986 #ifdef HAVE_MMX
1989 asm volatile( 1987 asm volatile(
1990 "xorl %%eax, %%eax \n\t" 1988 "xorl %%eax, %%eax \n\t"
1991 "pcmpeqw %%mm7, %%mm7 \n\t" 1989 "pcmpeqw %%mm7, %%mm7 \n\t"
1992 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... 1990 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1993 ASMALIGN16 1991 ASMALIGN(4)
1994 "1: \n\t" 1992 "1: \n\t"
1995 PREFETCH" 64(%0, %%eax, 4) \n\t" 1993 PREFETCH" 64(%0, %%eax, 4) \n\t"
1996 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) 1994 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0)
1997 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4) 1995 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4)
1998 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) 1996 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
2041 ydst += lumStride; 2039 ydst += lumStride;
2042 src += srcStride; 2040 src += srcStride;
2043 2041
2044 asm volatile( 2042 asm volatile(
2045 "xorl %%eax, %%eax \n\t" 2043 "xorl %%eax, %%eax \n\t"
2046 ASMALIGN16 2044 ASMALIGN(4)
2047 "1: \n\t" 2045 "1: \n\t"
2048 PREFETCH" 64(%0, %%eax, 4) \n\t" 2046 PREFETCH" 64(%0, %%eax, 4) \n\t"
2049 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) 2047 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
2050 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) 2048 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4)
2051 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) 2049 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8)
2119 "mov %2, %%"REG_a" \n\t" 2117 "mov %2, %%"REG_a" \n\t"
2120 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" 2118 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
2121 "movq "MANGLE(w1111)", %%mm5 \n\t" 2119 "movq "MANGLE(w1111)", %%mm5 \n\t"
2122 "pxor %%mm7, %%mm7 \n\t" 2120 "pxor %%mm7, %%mm7 \n\t"
2123 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" 2121 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t"
2124 ASMALIGN16 2122 ASMALIGN(4)
2125 "1: \n\t" 2123 "1: \n\t"
2126 PREFETCH" 64(%0, %%"REG_b") \n\t" 2124 PREFETCH" 64(%0, %%"REG_b") \n\t"
2127 "movd (%0, %%"REG_b"), %%mm0 \n\t" 2125 "movd (%0, %%"REG_b"), %%mm0 \n\t"
2128 "movd 3(%0, %%"REG_b"), %%mm1 \n\t" 2126 "movd 3(%0, %%"REG_b"), %%mm1 \n\t"
2129 "punpcklbw %%mm7, %%mm0 \n\t" 2127 "punpcklbw %%mm7, %%mm0 \n\t"
2193 "movq "MANGLE(w1111)", %%mm5 \n\t" 2191 "movq "MANGLE(w1111)", %%mm5 \n\t"
2194 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" 2192 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
2195 "pxor %%mm7, %%mm7 \n\t" 2193 "pxor %%mm7, %%mm7 \n\t"
2196 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" 2194 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t"
2197 "add %%"REG_b", %%"REG_b" \n\t" 2195 "add %%"REG_b", %%"REG_b" \n\t"
2198 ASMALIGN16 2196 ASMALIGN(4)
2199 "1: \n\t" 2197 "1: \n\t"
2200 PREFETCH" 64(%0, %%"REG_b") \n\t" 2198 PREFETCH" 64(%0, %%"REG_b") \n\t"
2201 PREFETCH" 64(%1, %%"REG_b") \n\t" 2199 PREFETCH" 64(%1, %%"REG_b") \n\t"
2202 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 2200 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2203 "movq (%0, %%"REG_b"), %%mm0 \n\t" 2201 "movq (%0, %%"REG_b"), %%mm0 \n\t"