comparison libswscale/rgb2rgb_template.c @ 28957:e538af2c69c1

Do not assume long is same width as x86 register.
author ramiro
date Wed, 18 Mar 2009 17:07:30 +0000
parents 8a0785c19f48
children 992cca4d7dd3
comparison
equal deleted inserted replaced
28956:6d0da4fd4544 28957:e538af2c69c1
1337 } 1337 }
1338 } 1338 }
1339 1339
1340 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) 1340 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
1341 { 1341 {
1342 long idx = 15 - src_size; 1342 x86_reg idx = 15 - src_size;
1343 const uint8_t *s = src-idx; 1343 const uint8_t *s = src-idx;
1344 uint8_t *d = dst-idx; 1344 uint8_t *d = dst-idx;
1345 #if HAVE_MMX 1345 #if HAVE_MMX
1346 __asm__ volatile( 1346 __asm__ volatile(
1347 "test %0, %0 \n\t" 1347 "test %0, %0 \n\t"
1403 1403
1404 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) 1404 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
1405 { 1405 {
1406 unsigned i; 1406 unsigned i;
1407 #if HAVE_MMX 1407 #if HAVE_MMX
1408 long mmx_size= 23 - src_size; 1408 x86_reg mmx_size= 23 - src_size;
1409 __asm__ volatile ( 1409 __asm__ volatile (
1410 "test %%"REG_a", %%"REG_a" \n\t" 1410 "test %%"REG_a", %%"REG_a" \n\t"
1411 "jns 2f \n\t" 1411 "jns 2f \n\t"
1412 "movq "MANGLE(mask24r)", %%mm5 \n\t" 1412 "movq "MANGLE(mask24r)", %%mm5 \n\t"
1413 "movq "MANGLE(mask24g)", %%mm6 \n\t" 1413 "movq "MANGLE(mask24g)", %%mm6 \n\t"
1474 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, 1474 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1475 long width, long height, 1475 long width, long height,
1476 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) 1476 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1477 { 1477 {
1478 long y; 1478 long y;
1479 const long chromWidth= width>>1; 1479 const x86_reg chromWidth= width>>1;
1480 for (y=0; y<height; y++) 1480 for (y=0; y<height; y++)
1481 { 1481 {
1482 #if HAVE_MMX 1482 #if HAVE_MMX
1483 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) 1483 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1484 __asm__ volatile( 1484 __asm__ volatile(
1626 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, 1626 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1627 long width, long height, 1627 long width, long height,
1628 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) 1628 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1629 { 1629 {
1630 long y; 1630 long y;
1631 const long chromWidth= width>>1; 1631 const x86_reg chromWidth= width>>1;
1632 for (y=0; y<height; y++) 1632 for (y=0; y<height; y++)
1633 { 1633 {
1634 #if HAVE_MMX 1634 #if HAVE_MMX
1635 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) 1635 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1636 __asm__ volatile( 1636 __asm__ volatile(
1756 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 1756 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1757 long width, long height, 1757 long width, long height,
1758 long lumStride, long chromStride, long srcStride) 1758 long lumStride, long chromStride, long srcStride)
1759 { 1759 {
1760 long y; 1760 long y;
1761 const long chromWidth= width>>1; 1761 const x86_reg chromWidth= width>>1;
1762 for (y=0; y<height; y+=2) 1762 for (y=0; y<height; y+=2)
1763 { 1763 {
1764 #if HAVE_MMX 1764 #if HAVE_MMX
1765 __asm__ volatile( 1765 __asm__ volatile(
1766 "xor %%"REG_a", %%"REG_a" \n\t" 1766 "xor %%"REG_a", %%"REG_a" \n\t"
1898 1898
1899 dst+= dstStride; 1899 dst+= dstStride;
1900 1900
1901 for (y=1; y<srcHeight; y++){ 1901 for (y=1; y<srcHeight; y++){
1902 #if HAVE_MMX2 || HAVE_AMD3DNOW 1902 #if HAVE_MMX2 || HAVE_AMD3DNOW
1903 const long mmxSize= srcWidth&~15; 1903 const x86_reg mmxSize= srcWidth&~15;
1904 __asm__ volatile( 1904 __asm__ volatile(
1905 "mov %4, %%"REG_a" \n\t" 1905 "mov %4, %%"REG_a" \n\t"
1906 "1: \n\t" 1906 "1: \n\t"
1907 "movq (%0, %%"REG_a"), %%mm0 \n\t" 1907 "movq (%0, %%"REG_a"), %%mm0 \n\t"
1908 "movq (%1, %%"REG_a"), %%mm1 \n\t" 1908 "movq (%1, %%"REG_a"), %%mm1 \n\t"
1942 "g" (-mmxSize) 1942 "g" (-mmxSize)
1943 : "%"REG_a 1943 : "%"REG_a
1944 1944
1945 ); 1945 );
1946 #else 1946 #else
1947 const long mmxSize=1; 1947 const x86_reg mmxSize=1;
1948 #endif 1948 #endif
1949 dst[0 ]= (3*src[0] + src[srcStride])>>2; 1949 dst[0 ]= (3*src[0] + src[srcStride])>>2;
1950 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; 1950 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
1951 1951
1952 for (x=mmxSize-1; x<srcWidth-1; x++){ 1952 for (x=mmxSize-1; x<srcWidth-1; x++){
1994 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 1994 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1995 long width, long height, 1995 long width, long height,
1996 long lumStride, long chromStride, long srcStride) 1996 long lumStride, long chromStride, long srcStride)
1997 { 1997 {
1998 long y; 1998 long y;
1999 const long chromWidth= width>>1; 1999 const x86_reg chromWidth= width>>1;
2000 for (y=0; y<height; y+=2) 2000 for (y=0; y<height; y+=2)
2001 { 2001 {
2002 #if HAVE_MMX 2002 #if HAVE_MMX
2003 __asm__ volatile( 2003 __asm__ volatile(
2004 "xor %%"REG_a", %%"REG_a" \n\t" 2004 "xor %%"REG_a", %%"REG_a" \n\t"
2121 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 2121 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
2122 long width, long height, 2122 long width, long height,
2123 long lumStride, long chromStride, long srcStride) 2123 long lumStride, long chromStride, long srcStride)
2124 { 2124 {
2125 long y; 2125 long y;
2126 const long chromWidth= width>>1; 2126 const x86_reg chromWidth= width>>1;
2127 #if HAVE_MMX 2127 #if HAVE_MMX
2128 for (y=0; y<height-2; y+=2) 2128 for (y=0; y<height-2; y+=2)
2129 { 2129 {
2130 long i; 2130 long i;
2131 for (i=0; i<2; i++) 2131 for (i=0; i<2; i++)
2194 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" 2194 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
2195 2195
2196 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" 2196 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
2197 "add $8, %%"REG_a" \n\t" 2197 "add $8, %%"REG_a" \n\t"
2198 " js 1b \n\t" 2198 " js 1b \n\t"
2199 : : "r" (src+width*3), "r" (ydst+width), "g" (-width) 2199 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
2200 : "%"REG_a, "%"REG_d 2200 : "%"REG_a, "%"REG_d
2201 ); 2201 );
2202 ydst += lumStride; 2202 ydst += lumStride;
2203 src += srcStride; 2203 src += srcStride;
2204 } 2204 }
2438 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" 2438 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t"
2439 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" 2439 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t"
2440 "add $16, %%"REG_a" \n\t" 2440 "add $16, %%"REG_a" \n\t"
2441 "cmp %3, %%"REG_a" \n\t" 2441 "cmp %3, %%"REG_a" \n\t"
2442 " jb 1b \n\t" 2442 " jb 1b \n\t"
2443 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) 2443 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
2444 : "memory", "%"REG_a"" 2444 : "memory", "%"REG_a""
2445 ); 2445 );
2446 #else 2446 #else
2447 __asm__( 2447 __asm__(
2448 "xor %%"REG_a", %%"REG_a" \n\t" 2448 "xor %%"REG_a", %%"REG_a" \n\t"
2464 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" 2464 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t"
2465 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" 2465 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t"
2466 "add $16, %%"REG_a" \n\t" 2466 "add $16, %%"REG_a" \n\t"
2467 "cmp %3, %%"REG_a" \n\t" 2467 "cmp %3, %%"REG_a" \n\t"
2468 " jb 1b \n\t" 2468 " jb 1b \n\t"
2469 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) 2469 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
2470 : "memory", "%"REG_a 2470 : "memory", "%"REG_a
2471 ); 2471 );
2472 #endif 2472 #endif
2473 for (w= (width&(~15)); w < width; w++) 2473 for (w= (width&(~15)); w < width; w++)
2474 { 2474 {
2499 uint8_t *dst1, uint8_t *dst2, 2499 uint8_t *dst1, uint8_t *dst2,
2500 long width, long height, 2500 long width, long height,
2501 long srcStride1, long srcStride2, 2501 long srcStride1, long srcStride2,
2502 long dstStride1, long dstStride2) 2502 long dstStride1, long dstStride2)
2503 { 2503 {
2504 long y,x,w,h; 2504 x86_reg y;
2505 long x,w,h;
2505 w=width/2; h=height/2; 2506 w=width/2; h=height/2;
2506 #if HAVE_MMX 2507 #if HAVE_MMX
2507 __asm__ volatile( 2508 __asm__ volatile(
2508 PREFETCH" %0 \n\t" 2509 PREFETCH" %0 \n\t"
2509 PREFETCH" %1 \n\t" 2510 PREFETCH" %1 \n\t"
2602 uint8_t *dst, 2603 uint8_t *dst,
2603 long width, long height, 2604 long width, long height,
2604 long srcStride1, long srcStride2, 2605 long srcStride1, long srcStride2,
2605 long srcStride3, long dstStride) 2606 long srcStride3, long dstStride)
2606 { 2607 {
2607 long y,x,w,h; 2608 x86_reg x;
2609 long y,w,h;
2608 w=width/2; h=height; 2610 w=width/2; h=height;
2609 for (y=0;y<h;y++){ 2611 for (y=0;y<h;y++){
2610 const uint8_t* yp=src1+srcStride1*y; 2612 const uint8_t* yp=src1+srcStride1*y;
2611 const uint8_t* up=src2+srcStride2*(y>>2); 2613 const uint8_t* up=src2+srcStride2*(y>>2);
2612 const uint8_t* vp=src3+srcStride3*(y>>2); 2614 const uint8_t* vp=src3+srcStride3*(y>>2);