comparison libswscale/rgb2rgb_template.c @ 28968:e67354af521e

Revert Date: Wed Mar 18 23:11:50 2009 New Revision: 28996 Log: Fix libswscale compilation on non-x86, hopefully without breaking MinGW64 again. This change was non optimal, correct would have been to revert the offending commits if no time was available to find a clean fix.
author michael
date Thu, 19 Mar 2009 15:02:44 +0000
parents a7e795e068ad
children a03804d10dbf
comparison
equal deleted inserted replaced
28967:128847ca9780 28968:e67354af521e
1337 } 1337 }
1338 } 1338 }
1339 1339
1340 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) 1340 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
1341 { 1341 {
1342 #if HAVE_MMX 1342 x86_reg idx = 15 - src_size;
1343 x86_reg
1344 #else
1345 long
1346 #endif
1347 idx = 15 - src_size;
1348 const uint8_t *s = src-idx; 1343 const uint8_t *s = src-idx;
1349 uint8_t *d = dst-idx; 1344 uint8_t *d = dst-idx;
1350 #if HAVE_MMX 1345 #if HAVE_MMX
1351 __asm__ volatile( 1346 __asm__ volatile(
1352 "test %0, %0 \n\t" 1347 "test %0, %0 \n\t"
1479 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, 1474 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1480 long width, long height, 1475 long width, long height,
1481 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) 1476 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1482 { 1477 {
1483 long y; 1478 long y;
1484 const long chromWidth= width>>1; 1479 const x86_reg chromWidth= width>>1;
1485 for (y=0; y<height; y++) 1480 for (y=0; y<height; y++)
1486 { 1481 {
1487 #if HAVE_MMX 1482 #if HAVE_MMX
1488 x86_reg cw = chromWidth;
1489 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) 1483 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1490 __asm__ volatile( 1484 __asm__ volatile(
1491 "xor %%"REG_a", %%"REG_a" \n\t" 1485 "xor %%"REG_a", %%"REG_a" \n\t"
1492 ASMALIGN(4) 1486 ASMALIGN(4)
1493 "1: \n\t" 1487 "1: \n\t"
1515 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" 1509 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
1516 1510
1517 "add $8, %%"REG_a" \n\t" 1511 "add $8, %%"REG_a" \n\t"
1518 "cmp %4, %%"REG_a" \n\t" 1512 "cmp %4, %%"REG_a" \n\t"
1519 " jb 1b \n\t" 1513 " jb 1b \n\t"
1520 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (cw) 1514 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
1521 : "%"REG_a 1515 : "%"REG_a
1522 ); 1516 );
1523 #else 1517 #else
1524 1518
1525 #if ARCH_ALPHA && HAVE_MVI 1519 #if ARCH_ALPHA && HAVE_MVI
1632 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, 1626 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1633 long width, long height, 1627 long width, long height,
1634 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) 1628 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1635 { 1629 {
1636 long y; 1630 long y;
1637 const long chromWidth= width>>1; 1631 const x86_reg chromWidth= width>>1;
1638 for (y=0; y<height; y++) 1632 for (y=0; y<height; y++)
1639 { 1633 {
1640 #if HAVE_MMX 1634 #if HAVE_MMX
1641 x86_reg cw = chromWidth;
1642 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) 1635 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1643 __asm__ volatile( 1636 __asm__ volatile(
1644 "xor %%"REG_a", %%"REG_a" \n\t" 1637 "xor %%"REG_a", %%"REG_a" \n\t"
1645 ASMALIGN(4) 1638 ASMALIGN(4)
1646 "1: \n\t" 1639 "1: \n\t"
1668 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" 1661 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
1669 1662
1670 "add $8, %%"REG_a" \n\t" 1663 "add $8, %%"REG_a" \n\t"
1671 "cmp %4, %%"REG_a" \n\t" 1664 "cmp %4, %%"REG_a" \n\t"
1672 " jb 1b \n\t" 1665 " jb 1b \n\t"
1673 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (cw) 1666 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
1674 : "%"REG_a 1667 : "%"REG_a
1675 ); 1668 );
1676 #else 1669 #else
1677 //FIXME adapt the Alpha ASM code from yv12->yuy2 1670 //FIXME adapt the Alpha ASM code from yv12->yuy2
1678 1671
1763 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 1756 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1764 long width, long height, 1757 long width, long height,
1765 long lumStride, long chromStride, long srcStride) 1758 long lumStride, long chromStride, long srcStride)
1766 { 1759 {
1767 long y; 1760 long y;
1768 const long chromWidth= width>>1; 1761 const x86_reg chromWidth= width>>1;
1769 for (y=0; y<height; y+=2) 1762 for (y=0; y<height; y+=2)
1770 { 1763 {
1771 #if HAVE_MMX 1764 #if HAVE_MMX
1772 x86_reg cw = chromWidth;
1773 __asm__ volatile( 1765 __asm__ volatile(
1774 "xor %%"REG_a", %%"REG_a" \n\t" 1766 "xor %%"REG_a", %%"REG_a" \n\t"
1775 "pcmpeqw %%mm7, %%mm7 \n\t" 1767 "pcmpeqw %%mm7, %%mm7 \n\t"
1776 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... 1768 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1777 ASMALIGN(4) 1769 ASMALIGN(4)
1816 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" 1808 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
1817 1809
1818 "add $8, %%"REG_a" \n\t" 1810 "add $8, %%"REG_a" \n\t"
1819 "cmp %4, %%"REG_a" \n\t" 1811 "cmp %4, %%"REG_a" \n\t"
1820 " jb 1b \n\t" 1812 " jb 1b \n\t"
1821 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (cw) 1813 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1822 : "memory", "%"REG_a 1814 : "memory", "%"REG_a
1823 ); 1815 );
1824 1816
1825 ydst += lumStride; 1817 ydst += lumStride;
1826 src += srcStride; 1818 src += srcStride;
1846 1838
1847 "add $8, %%"REG_a" \n\t" 1839 "add $8, %%"REG_a" \n\t"
1848 "cmp %4, %%"REG_a" \n\t" 1840 "cmp %4, %%"REG_a" \n\t"
1849 " jb 1b \n\t" 1841 " jb 1b \n\t"
1850 1842
1851 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (cw) 1843 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1852 : "memory", "%"REG_a 1844 : "memory", "%"REG_a
1853 ); 1845 );
1854 #else 1846 #else
1855 long i; 1847 long i;
1856 for (i=0; i<chromWidth; i++) 1848 for (i=0; i<chromWidth; i++)
1950 "g" (-mmxSize) 1942 "g" (-mmxSize)
1951 : "%"REG_a 1943 : "%"REG_a
1952 1944
1953 ); 1945 );
1954 #else 1946 #else
1955 const long mmxSize=1; 1947 const x86_reg mmxSize=1;
1956 #endif 1948 #endif
1957 dst[0 ]= (3*src[0] + src[srcStride])>>2; 1949 dst[0 ]= (3*src[0] + src[srcStride])>>2;
1958 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; 1950 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
1959 1951
1960 for (x=mmxSize-1; x<srcWidth-1; x++){ 1952 for (x=mmxSize-1; x<srcWidth-1; x++){
2002 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 1994 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
2003 long width, long height, 1995 long width, long height,
2004 long lumStride, long chromStride, long srcStride) 1996 long lumStride, long chromStride, long srcStride)
2005 { 1997 {
2006 long y; 1998 long y;
2007 const long chromWidth= width>>1; 1999 const x86_reg chromWidth= width>>1;
2008 for (y=0; y<height; y+=2) 2000 for (y=0; y<height; y+=2)
2009 { 2001 {
2010 #if HAVE_MMX 2002 #if HAVE_MMX
2011 x86_reg cw = chromWidth;
2012 __asm__ volatile( 2003 __asm__ volatile(
2013 "xor %%"REG_a", %%"REG_a" \n\t" 2004 "xor %%"REG_a", %%"REG_a" \n\t"
2014 "pcmpeqw %%mm7, %%mm7 \n\t" 2005 "pcmpeqw %%mm7, %%mm7 \n\t"
2015 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... 2006 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
2016 ASMALIGN(4) 2007 ASMALIGN(4)
2055 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" 2046 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
2056 2047
2057 "add $8, %%"REG_a" \n\t" 2048 "add $8, %%"REG_a" \n\t"
2058 "cmp %4, %%"REG_a" \n\t" 2049 "cmp %4, %%"REG_a" \n\t"
2059 " jb 1b \n\t" 2050 " jb 1b \n\t"
2060 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (cw) 2051 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
2061 : "memory", "%"REG_a 2052 : "memory", "%"REG_a
2062 ); 2053 );
2063 2054
2064 ydst += lumStride; 2055 ydst += lumStride;
2065 src += srcStride; 2056 src += srcStride;
2085 2076
2086 "add $8, %%"REG_a" \n\t" 2077 "add $8, %%"REG_a" \n\t"
2087 "cmp %4, %%"REG_a" \n\t" 2078 "cmp %4, %%"REG_a" \n\t"
2088 " jb 1b \n\t" 2079 " jb 1b \n\t"
2089 2080
2090 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (cw) 2081 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
2091 : "memory", "%"REG_a 2082 : "memory", "%"REG_a
2092 ); 2083 );
2093 #else 2084 #else
2094 long i; 2085 long i;
2095 for (i=0; i<chromWidth; i++) 2086 for (i=0; i<chromWidth; i++)
2130 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 2121 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
2131 long width, long height, 2122 long width, long height,
2132 long lumStride, long chromStride, long srcStride) 2123 long lumStride, long chromStride, long srcStride)
2133 { 2124 {
2134 long y; 2125 long y;
2135 const long chromWidth= width>>1; 2126 const x86_reg chromWidth= width>>1;
2136 #if HAVE_MMX 2127 #if HAVE_MMX
2137 x86_reg cw = chromWidth;
2138 for (y=0; y<height-2; y+=2) 2128 for (y=0; y<height-2; y+=2)
2139 { 2129 {
2140 long i; 2130 long i;
2141 for (i=0; i<2; i++) 2131 for (i=0; i<2; i++)
2142 { 2132 {
2356 "movd %%mm0, (%2, %%"REG_a") \n\t" 2346 "movd %%mm0, (%2, %%"REG_a") \n\t"
2357 "punpckhdq %%mm0, %%mm0 \n\t" 2347 "punpckhdq %%mm0, %%mm0 \n\t"
2358 "movd %%mm0, (%3, %%"REG_a") \n\t" 2348 "movd %%mm0, (%3, %%"REG_a") \n\t"
2359 "add $4, %%"REG_a" \n\t" 2349 "add $4, %%"REG_a" \n\t"
2360 " js 1b \n\t" 2350 " js 1b \n\t"
2361 : : "r" (src+cw*6), "r" (src+srcStride+cw*6), "r" (udst+cw), "r" (vdst+cw), "g" (-cw) 2351 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
2362 : "%"REG_a, "%"REG_d 2352 : "%"REG_a, "%"REG_d
2363 ); 2353 );
2364 2354
2365 udst += chromStride; 2355 udst += chromStride;
2366 vdst += chromStride; 2356 vdst += chromStride;
2509 uint8_t *dst1, uint8_t *dst2, 2499 uint8_t *dst1, uint8_t *dst2,
2510 long width, long height, 2500 long width, long height,
2511 long srcStride1, long srcStride2, 2501 long srcStride1, long srcStride2,
2512 long dstStride1, long dstStride2) 2502 long dstStride1, long dstStride2)
2513 { 2503 {
2514 long x,y,w,h; 2504 x86_reg y;
2505 long x,w,h;
2515 w=width/2; h=height/2; 2506 w=width/2; h=height/2;
2516 #if HAVE_MMX 2507 #if HAVE_MMX
2517 __asm__ volatile( 2508 __asm__ volatile(
2518 PREFETCH" %0 \n\t" 2509 PREFETCH" %0 \n\t"
2519 PREFETCH" %1 \n\t" 2510 PREFETCH" %1 \n\t"
2612 uint8_t *dst, 2603 uint8_t *dst,
2613 long width, long height, 2604 long width, long height,
2614 long srcStride1, long srcStride2, 2605 long srcStride1, long srcStride2,
2615 long srcStride3, long dstStride) 2606 long srcStride3, long dstStride)
2616 { 2607 {
2608 x86_reg x;
2617 long y,w,h; 2609 long y,w,h;
2618 w=width/2; h=height; 2610 w=width/2; h=height;
2619 for (y=0;y<h;y++){ 2611 for (y=0;y<h;y++){
2620 const uint8_t* yp=src1+srcStride1*y; 2612 const uint8_t* yp=src1+srcStride1*y;
2621 const uint8_t* up=src2+srcStride2*(y>>2); 2613 const uint8_t* up=src2+srcStride2*(y>>2);
2622 const uint8_t* vp=src3+srcStride3*(y>>2); 2614 const uint8_t* vp=src3+srcStride3*(y>>2);
2623 uint8_t* d=dst+dstStride*y; 2615 uint8_t* d=dst+dstStride*y;
2624 #if HAVE_MMX 2616 x=0;
2625 x86_reg x = 0; 2617 #if HAVE_MMX
2626 for (;x<w-7;x+=8) 2618 for (;x<w-7;x+=8)
2627 { 2619 {
2628 __asm__ volatile( 2620 __asm__ volatile(
2629 PREFETCH" 32(%1, %0) \n\t" 2621 PREFETCH" 32(%1, %0) \n\t"
2630 PREFETCH" 32(%2, %0) \n\t" 2622 PREFETCH" 32(%2, %0) \n\t"
2674 2666
2675 : "+r" (x) 2667 : "+r" (x)
2676 : "r"(yp), "r" (up), "r"(vp), "r"(d) 2668 : "r"(yp), "r" (up), "r"(vp), "r"(d)
2677 :"memory"); 2669 :"memory");
2678 } 2670 }
2679 #else
2680 long x = 0;
2681 #endif 2671 #endif
2682 for (; x<w; x++) 2672 for (; x<w; x++)
2683 { 2673 {
2684 const long x2 = x<<2; 2674 const long x2 = x<<2;
2685 d[8*x+0] = yp[x2]; 2675 d[8*x+0] = yp[x2];