Mercurial > mplayer.hg
comparison libswscale/rgb2rgb_template.c @ 28957:e538af2c69c1
Do not assume long is same width as x86 register.
author | ramiro |
---|---|
date | Wed, 18 Mar 2009 17:07:30 +0000 |
parents | 8a0785c19f48 |
children | 992cca4d7dd3 |
comparison
equal
deleted
inserted
replaced
28956:6d0da4fd4544 | 28957:e538af2c69c1 |
---|---|
1337 } | 1337 } |
1338 } | 1338 } |
1339 | 1339 |
1340 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | 1340 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) |
1341 { | 1341 { |
1342 long idx = 15 - src_size; | 1342 x86_reg idx = 15 - src_size; |
1343 const uint8_t *s = src-idx; | 1343 const uint8_t *s = src-idx; |
1344 uint8_t *d = dst-idx; | 1344 uint8_t *d = dst-idx; |
1345 #if HAVE_MMX | 1345 #if HAVE_MMX |
1346 __asm__ volatile( | 1346 __asm__ volatile( |
1347 "test %0, %0 \n\t" | 1347 "test %0, %0 \n\t" |
1403 | 1403 |
1404 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) | 1404 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
1405 { | 1405 { |
1406 unsigned i; | 1406 unsigned i; |
1407 #if HAVE_MMX | 1407 #if HAVE_MMX |
1408 long mmx_size= 23 - src_size; | 1408 x86_reg mmx_size= 23 - src_size; |
1409 __asm__ volatile ( | 1409 __asm__ volatile ( |
1410 "test %%"REG_a", %%"REG_a" \n\t" | 1410 "test %%"REG_a", %%"REG_a" \n\t" |
1411 "jns 2f \n\t" | 1411 "jns 2f \n\t" |
1412 "movq "MANGLE(mask24r)", %%mm5 \n\t" | 1412 "movq "MANGLE(mask24r)", %%mm5 \n\t" |
1413 "movq "MANGLE(mask24g)", %%mm6 \n\t" | 1413 "movq "MANGLE(mask24g)", %%mm6 \n\t" |
1474 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | 1474 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1475 long width, long height, | 1475 long width, long height, |
1476 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | 1476 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
1477 { | 1477 { |
1478 long y; | 1478 long y; |
1479 const long chromWidth= width>>1; | 1479 const x86_reg chromWidth= width>>1; |
1480 for (y=0; y<height; y++) | 1480 for (y=0; y<height; y++) |
1481 { | 1481 { |
1482 #if HAVE_MMX | 1482 #if HAVE_MMX |
1483 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) | 1483 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
1484 __asm__ volatile( | 1484 __asm__ volatile( |
1626 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | 1626 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1627 long width, long height, | 1627 long width, long height, |
1628 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | 1628 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
1629 { | 1629 { |
1630 long y; | 1630 long y; |
1631 const long chromWidth= width>>1; | 1631 const x86_reg chromWidth= width>>1; |
1632 for (y=0; y<height; y++) | 1632 for (y=0; y<height; y++) |
1633 { | 1633 { |
1634 #if HAVE_MMX | 1634 #if HAVE_MMX |
1635 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) | 1635 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
1636 __asm__ volatile( | 1636 __asm__ volatile( |
1756 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 1756 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
1757 long width, long height, | 1757 long width, long height, |
1758 long lumStride, long chromStride, long srcStride) | 1758 long lumStride, long chromStride, long srcStride) |
1759 { | 1759 { |
1760 long y; | 1760 long y; |
1761 const long chromWidth= width>>1; | 1761 const x86_reg chromWidth= width>>1; |
1762 for (y=0; y<height; y+=2) | 1762 for (y=0; y<height; y+=2) |
1763 { | 1763 { |
1764 #if HAVE_MMX | 1764 #if HAVE_MMX |
1765 __asm__ volatile( | 1765 __asm__ volatile( |
1766 "xor %%"REG_a", %%"REG_a" \n\t" | 1766 "xor %%"REG_a", %%"REG_a" \n\t" |
1898 | 1898 |
1899 dst+= dstStride; | 1899 dst+= dstStride; |
1900 | 1900 |
1901 for (y=1; y<srcHeight; y++){ | 1901 for (y=1; y<srcHeight; y++){ |
1902 #if HAVE_MMX2 || HAVE_AMD3DNOW | 1902 #if HAVE_MMX2 || HAVE_AMD3DNOW |
1903 const long mmxSize= srcWidth&~15; | 1903 const x86_reg mmxSize= srcWidth&~15; |
1904 __asm__ volatile( | 1904 __asm__ volatile( |
1905 "mov %4, %%"REG_a" \n\t" | 1905 "mov %4, %%"REG_a" \n\t" |
1906 "1: \n\t" | 1906 "1: \n\t" |
1907 "movq (%0, %%"REG_a"), %%mm0 \n\t" | 1907 "movq (%0, %%"REG_a"), %%mm0 \n\t" |
1908 "movq (%1, %%"REG_a"), %%mm1 \n\t" | 1908 "movq (%1, %%"REG_a"), %%mm1 \n\t" |
1942 "g" (-mmxSize) | 1942 "g" (-mmxSize) |
1943 : "%"REG_a | 1943 : "%"REG_a |
1944 | 1944 |
1945 ); | 1945 ); |
1946 #else | 1946 #else |
1947 const long mmxSize=1; | 1947 const x86_reg mmxSize=1; |
1948 #endif | 1948 #endif |
1949 dst[0 ]= (3*src[0] + src[srcStride])>>2; | 1949 dst[0 ]= (3*src[0] + src[srcStride])>>2; |
1950 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; | 1950 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; |
1951 | 1951 |
1952 for (x=mmxSize-1; x<srcWidth-1; x++){ | 1952 for (x=mmxSize-1; x<srcWidth-1; x++){ |
1994 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 1994 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
1995 long width, long height, | 1995 long width, long height, |
1996 long lumStride, long chromStride, long srcStride) | 1996 long lumStride, long chromStride, long srcStride) |
1997 { | 1997 { |
1998 long y; | 1998 long y; |
1999 const long chromWidth= width>>1; | 1999 const x86_reg chromWidth= width>>1; |
2000 for (y=0; y<height; y+=2) | 2000 for (y=0; y<height; y+=2) |
2001 { | 2001 { |
2002 #if HAVE_MMX | 2002 #if HAVE_MMX |
2003 __asm__ volatile( | 2003 __asm__ volatile( |
2004 "xor %%"REG_a", %%"REG_a" \n\t" | 2004 "xor %%"REG_a", %%"REG_a" \n\t" |
2121 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 2121 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
2122 long width, long height, | 2122 long width, long height, |
2123 long lumStride, long chromStride, long srcStride) | 2123 long lumStride, long chromStride, long srcStride) |
2124 { | 2124 { |
2125 long y; | 2125 long y; |
2126 const long chromWidth= width>>1; | 2126 const x86_reg chromWidth= width>>1; |
2127 #if HAVE_MMX | 2127 #if HAVE_MMX |
2128 for (y=0; y<height-2; y+=2) | 2128 for (y=0; y<height-2; y+=2) |
2129 { | 2129 { |
2130 long i; | 2130 long i; |
2131 for (i=0; i<2; i++) | 2131 for (i=0; i<2; i++) |
2194 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" | 2194 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" |
2195 | 2195 |
2196 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" | 2196 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" |
2197 "add $8, %%"REG_a" \n\t" | 2197 "add $8, %%"REG_a" \n\t" |
2198 " js 1b \n\t" | 2198 " js 1b \n\t" |
2199 : : "r" (src+width*3), "r" (ydst+width), "g" (-width) | 2199 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) |
2200 : "%"REG_a, "%"REG_d | 2200 : "%"REG_a, "%"REG_d |
2201 ); | 2201 ); |
2202 ydst += lumStride; | 2202 ydst += lumStride; |
2203 src += srcStride; | 2203 src += srcStride; |
2204 } | 2204 } |
2438 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" | 2438 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" |
2439 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" | 2439 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" |
2440 "add $16, %%"REG_a" \n\t" | 2440 "add $16, %%"REG_a" \n\t" |
2441 "cmp %3, %%"REG_a" \n\t" | 2441 "cmp %3, %%"REG_a" \n\t" |
2442 " jb 1b \n\t" | 2442 " jb 1b \n\t" |
2443 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | 2443 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) |
2444 : "memory", "%"REG_a"" | 2444 : "memory", "%"REG_a"" |
2445 ); | 2445 ); |
2446 #else | 2446 #else |
2447 __asm__( | 2447 __asm__( |
2448 "xor %%"REG_a", %%"REG_a" \n\t" | 2448 "xor %%"REG_a", %%"REG_a" \n\t" |
2464 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" | 2464 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" |
2465 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" | 2465 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" |
2466 "add $16, %%"REG_a" \n\t" | 2466 "add $16, %%"REG_a" \n\t" |
2467 "cmp %3, %%"REG_a" \n\t" | 2467 "cmp %3, %%"REG_a" \n\t" |
2468 " jb 1b \n\t" | 2468 " jb 1b \n\t" |
2469 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | 2469 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) |
2470 : "memory", "%"REG_a | 2470 : "memory", "%"REG_a |
2471 ); | 2471 ); |
2472 #endif | 2472 #endif |
2473 for (w= (width&(~15)); w < width; w++) | 2473 for (w= (width&(~15)); w < width; w++) |
2474 { | 2474 { |
2499 uint8_t *dst1, uint8_t *dst2, | 2499 uint8_t *dst1, uint8_t *dst2, |
2500 long width, long height, | 2500 long width, long height, |
2501 long srcStride1, long srcStride2, | 2501 long srcStride1, long srcStride2, |
2502 long dstStride1, long dstStride2) | 2502 long dstStride1, long dstStride2) |
2503 { | 2503 { |
2504 long y,x,w,h; | 2504 x86_reg y; |
2505 long x,w,h; | |
2505 w=width/2; h=height/2; | 2506 w=width/2; h=height/2; |
2506 #if HAVE_MMX | 2507 #if HAVE_MMX |
2507 __asm__ volatile( | 2508 __asm__ volatile( |
2508 PREFETCH" %0 \n\t" | 2509 PREFETCH" %0 \n\t" |
2509 PREFETCH" %1 \n\t" | 2510 PREFETCH" %1 \n\t" |
2602 uint8_t *dst, | 2603 uint8_t *dst, |
2603 long width, long height, | 2604 long width, long height, |
2604 long srcStride1, long srcStride2, | 2605 long srcStride1, long srcStride2, |
2605 long srcStride3, long dstStride) | 2606 long srcStride3, long dstStride) |
2606 { | 2607 { |
2607 long y,x,w,h; | 2608 x86_reg x; |
2609 long y,w,h; | |
2608 w=width/2; h=height; | 2610 w=width/2; h=height; |
2609 for (y=0;y<h;y++){ | 2611 for (y=0;y<h;y++){ |
2610 const uint8_t* yp=src1+srcStride1*y; | 2612 const uint8_t* yp=src1+srcStride1*y; |
2611 const uint8_t* up=src2+srcStride2*(y>>2); | 2613 const uint8_t* up=src2+srcStride2*(y>>2); |
2612 const uint8_t* vp=src3+srcStride3*(y>>2); | 2614 const uint8_t* vp=src3+srcStride3*(y>>2); |