Mercurial > mplayer.hg
changeset 6492:e7635c03910f
sync with mplayer xp
- partial yvu9 support (copy only)
- rgb 15/16 -> 24/32 converters
- int->unsigned changes
author | arpi |
---|---|
date | Sat, 22 Jun 2002 08:49:45 +0000 |
parents | 920796b6c7b1 |
children | c4109cc97fe0 |
files | postproc/rgb2rgb.c postproc/rgb2rgb.h postproc/rgb2rgb_template.c postproc/swscale.c postproc/swscale_template.c postproc/yuv2rgb.c postproc/yuv2rgb_mlib.c postproc/yuv2rgb_template.c |
diffstat | 8 files changed, 1123 insertions(+), 287 deletions(-) [+] |
line wrap: on
line diff
--- a/postproc/rgb2rgb.c Sat Jun 22 08:47:56 2002 +0000 +++ b/postproc/rgb2rgb.c Sat Jun 22 08:49:45 2002 +0000 @@ -20,6 +20,8 @@ #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit #ifdef CAN_COMPILE_X86_ASM +static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL; +static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL; static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; @@ -35,6 +37,11 @@ static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL; +static const uint64_t mask15g __attribute__((aligned(8))) = 0x03E003E003E003E0ULL; +static const uint64_t mask15r __attribute__((aligned(8))) = 0x7C007C007C007C00ULL; +#define mask16b mask15b +static const uint64_t mask16g __attribute__((aligned(8))) = 0x07E007E007E007E0ULL; +static const uint64_t mask16r __attribute__((aligned(8))) = 0xF800F800F800F800ULL; static const uint64_t red_16mask __attribute__((aligned(8))) = 0x0000f8000000f800ULL; static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL; static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL; @@ -137,10 +144,68 @@ else if(gCpuCaps.hasMMX) rgb24to32_MMX(src, dst, src_size); else +#endif rgb24to32_C(src, dst, src_size); -#else - rgb24to32_C(src, dst, src_size); +} + +void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size) +{ +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + rgb15to24_MMX2(src, dst, src_size); + else if(gCpuCaps.has3DNow) + rgb15to24_3DNow(src, dst, src_size); + else if(gCpuCaps.hasMMX) + rgb15to24_MMX(src, dst, src_size); + else +#endif + rgb15to24_C(src, dst, src_size); +} + +void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size) +{ +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + rgb16to24_MMX2(src, dst, src_size); + else if(gCpuCaps.has3DNow) + rgb16to24_3DNow(src, dst, src_size); + else if(gCpuCaps.hasMMX) + rgb16to24_MMX(src, dst, src_size); + else #endif + rgb16to24_C(src, dst, src_size); +} + +void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size) +{ +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + rgb15to32_MMX2(src, dst, src_size); + else if(gCpuCaps.has3DNow) + rgb15to32_3DNow(src, dst, src_size); + else if(gCpuCaps.hasMMX) + rgb15to32_MMX(src, dst, src_size); + else +#endif + rgb15to32_C(src, dst, src_size); +} + +void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size) +{ +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + rgb16to32_MMX2(src, dst, src_size); + else if(gCpuCaps.has3DNow) + rgb16to32_3DNow(src, dst, src_size); + else if(gCpuCaps.hasMMX) + rgb16to32_MMX(src, dst, src_size); + else +#endif + rgb16to32_C(src, dst, src_size); } void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size) @@ -154,10 +219,8 @@ else if(gCpuCaps.hasMMX) rgb32to24_MMX(src, dst, src_size); else +#endif rgb32to24_C(src, dst, src_size); -#else - rgb32to24_C(src, dst, src_size); -#endif } /* @@ -177,10 +240,8 @@ else if(gCpuCaps.hasMMX) rgb15to16_MMX(src, dst, src_size); else +#endif rgb15to16_C(src, dst, src_size); -#else - rgb15to16_C(src, dst, src_size); -#endif } /** @@ -242,10 +303,8 @@ else if(gCpuCaps.hasMMX) rgb32to16_MMX(src, dst, src_size); else +#endif rgb32to16_C(src, dst, src_size); -#else - rgb32to16_C(src, dst, src_size); -#endif } void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size) @@ -259,10 +318,8 @@ else if(gCpuCaps.hasMMX) rgb32to15_MMX(src, dst, src_size); else +#endif rgb32to15_C(src, dst, src_size); -#else - rgb32to15_C(src, dst, src_size); -#endif } void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size) @@ -276,10 +333,8 @@ else if(gCpuCaps.hasMMX) rgb24to16_MMX(src, dst, src_size); else +#endif rgb24to16_C(src, dst, src_size); -#else - rgb24to16_C(src, dst, src_size); -#endif } void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size) @@ -293,10 +348,8 @@ else if(gCpuCaps.hasMMX) rgb24to15_MMX(src, dst, src_size); else +#endif rgb24to15_C(src, dst, src_size); -#else - rgb24to15_C(src, dst, src_size); -#endif } /** @@ -330,10 +383,8 @@ else if(gCpuCaps.hasMMX) rgb32tobgr32_MMX(src, dst, src_size); else +#endif rgb32tobgr32_C(src, dst, src_size); -#else - rgb32tobgr32_C(src, dst, src_size); -#endif } void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) @@ -347,10 +398,8 @@ else if(gCpuCaps.hasMMX) rgb24tobgr24_MMX(src, dst, src_size); else +#endif rgb24tobgr24_C(src, dst, src_size); -#else - rgb24tobgr24_C(src, dst, src_size); -#endif } /** @@ -371,10 +420,8 @@ else if(gCpuCaps.hasMMX) yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); else +#endif yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); -#else - yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); -#endif } /** @@ -394,10 +441,8 @@ else if(gCpuCaps.hasMMX) yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); else +#endif yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); -#else - yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); -#endif } /** @@ -418,10 +463,8 @@ else if(gCpuCaps.hasMMX) yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); else +#endif yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); -#else - yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); -#endif } /** @@ -488,14 +531,13 @@ else if(gCpuCaps.hasMMX) rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); else +#endif rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); -#else - rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); -#endif } void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, - int width, int height, int src1Stride, int src2Stride, int dstStride) + unsigned width, unsigned height, unsigned src1Stride, + unsigned src2Stride, unsigned dstStride) { #ifdef CAN_COMPILE_X86_ASM // ordered per speed fasterst first @@ -506,8 +548,6 @@ else if(gCpuCaps.hasMMX) interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); else - interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); -#else +#endif interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); -#endif }
--- a/postproc/rgb2rgb.h Sat Jun 22 08:47:56 2002 +0000 +++ b/postproc/rgb2rgb.h Sat Jun 22 08:49:45 2002 +0000 @@ -10,12 +10,16 @@ #define RGB2RGB_INCLUDED extern void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size); +extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size); +extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size); -extern void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb32to16(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb32to15(const uint8_t *src,uint8_t *dst,unsigned src_size); -extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size); -extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size); +extern void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size); +extern void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size); +extern void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size); +extern void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size); +extern void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); extern void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size); @@ -39,7 +43,8 @@ unsigned int lumStride, unsigned int chromStride, unsigned int srcStride); extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, - int width, int height, int src1Stride, int src2Stride, int dstStride); + unsigned width, unsigned height, unsigned src1Stride, + unsigned src2Stride, unsigned dstStride); #define MODE_RGB 0x1 @@ -47,11 +52,11 @@ typedef void (* yuv2rgb_fun) (uint8_t * image, uint8_t * py, uint8_t * pu, uint8_t * pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride); + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride); extern yuv2rgb_fun yuv2rgb; -void yuv2rgb_init (int bpp, int mode); +void yuv2rgb_init (unsigned bpp, int mode); #endif
--- a/postproc/rgb2rgb_template.c Sat Jun 22 08:47:56 2002 +0000 +++ b/postproc/rgb2rgb_template.c Sat Jun 22 08:49:45 2002 +0000 @@ -8,6 +8,13 @@ * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) */ +#include <stddef.h> +#include <inttypes.h> /* for __WORDSIZE */ + +#ifndef __WORDSIZE +#warning You have misconfigured system and probably will lose performance! +#endif + #undef PREFETCH #undef MOVNTQ #undef EMMS @@ -56,13 +63,13 @@ const uint8_t *s = src; const uint8_t *end; #ifdef HAVE_MMX - const uint8_t *mm_end; + uint8_t *mm_end; #endif end = s + src_size; #ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 23; __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); + mm_end = (uint8_t*)((((unsigned long)end)/24)*24); while(s < mm_end) { __asm __volatile( @@ -107,12 +114,12 @@ const uint8_t *s = src; const uint8_t *end; #ifdef HAVE_MMX - const uint8_t *mm_end; + uint8_t *mm_end; #endif end = s + src_size; #ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 31; + mm_end = (uint8_t*)((((unsigned long)end)/32)*32); while(s < mm_end) { __asm __volatile( @@ -186,15 +193,16 @@ */ static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) { + register const uint8_t* s=src; + register uint8_t* d=dst; + register const uint8_t *end; + uint8_t *mm_end; + end = s + src_size; #ifdef HAVE_MMX - register int offs=15-src_size; - register const char* s=src-offs; - register char* d=dst-offs; - __asm __volatile(PREFETCH" %0"::"m"(*(s+offs))); - __asm __volatile( - "movq %0, %%mm4\n\t" - ::"m"(mask15s)); - while(offs<0) + __asm __volatile(PREFETCH" %0"::"m"(*s)); + __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); + mm_end = (uint8_t*)((((unsigned long)end)/16)*16); + while(s<mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" @@ -208,40 +216,28 @@ "paddw %%mm3, %%mm2\n\t" MOVNTQ" %%mm0, %0\n\t" MOVNTQ" %%mm2, 8%0" - :"=m"(*(d+offs)) - :"m"(*(s+offs)) + :"=m"(*d) + :"m"(*s) ); - offs+=16; + d+=16; + s+=16; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory"); -#else -#if 0 - const uint16_t *s1=( uint16_t * )src; - uint16_t *d1=( uint16_t * )dst; - uint16_t *e=((uint8_t *)s1)+src_size; - while( s1<e ){ - register int x=*( s1++ ); - /* rrrrrggggggbbbbb - 0rrrrrgggggbbbbb - 0111 1111 1110 0000=0x7FE0 - 00000000000001 1111=0x001F */ - *( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 ); - } -#else - const unsigned *s1=( unsigned * )src; - unsigned *d1=( unsigned * )dst; - int i; - int size= src_size>>2; - for(i=0; i<size; i++) - { - register int x= s1[i]; -// d1[i] = x + (x&0x7FE07FE0); //faster but need msbit =0 which might not allways be true - d1[i] = (x&0x7FFF7FFF) + (x&0x7FE07FE0); - - } #endif -#endif + mm_end = (uint8_t*)((((unsigned long)end)/4)*4); + while(s < mm_end) + { + register unsigned x= *((uint32_t *)s); + *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); + d+=4; + s+=4; + } + if(s < end) + { + register unsigned short x= *((uint16_t *)s); + *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); + } } static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size) @@ -257,17 +253,20 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) { + const uint8_t *s = src; + const uint8_t *end; #ifdef HAVE_MMX - const uint8_t *s = src; - const uint8_t *end,*mm_end; + const uint8_t *mm_end; +#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; - mm_end = end - 15; +#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_16mask),"m"(green_16mask)); + mm_end = (uint8_t*)((((unsigned long)end)/16)*16); while(s < mm_end) { __asm __volatile( @@ -303,43 +302,35 @@ d += 4; s += 16; } + __asm __volatile(SFENCE:::"memory"); + __asm __volatile(EMMS:::"memory"); +#endif while(s < end) { const int b= *s++; const int g= *s++; const int r= *s++; - s++; *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); + s++; } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#else - unsigned j,i,num_pixels=src_size/4; - uint16_t *d = (uint16_t *)dst; - for(i=0,j=0; j<num_pixels; i+=4,j++) - { - const int b= src[i+0]; - const int g= src[i+1]; - const int r= src[i+2]; - - d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); - } -#endif } static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) { + const uint8_t *s = src; + const uint8_t *end; #ifdef HAVE_MMX - const uint8_t *s = src; - const uint8_t *end,*mm_end; + const uint8_t *mm_end; +#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; - mm_end = end - 15; +#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_15mask),"m"(green_15mask)); + mm_end = (uint8_t*)((((unsigned long)end)/16)*16); while(s < mm_end) { __asm __volatile( @@ -375,43 +366,35 @@ d += 4; s += 16; } + __asm __volatile(SFENCE:::"memory"); + __asm __volatile(EMMS:::"memory"); +#endif while(s < end) { const int b= *s++; const int g= *s++; const int r= *s++; + *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); s++; - *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#else - unsigned j,i,num_pixels=src_size/4; - uint16_t *d = (uint16_t *)dst; - for(i=0,j=0; j<num_pixels; i+=4,j++) - { - const int b= src[i+0]; - const int g= src[i+1]; - const int r= src[i+2]; - - d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); - } -#endif } static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) { + const uint8_t *s = src; + const uint8_t *end; #ifdef HAVE_MMX - const uint8_t *s = src; - const uint8_t *end,*mm_end; + const uint8_t *mm_end; +#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; - mm_end = end - 11; +#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_16mask),"m"(green_16mask)); + mm_end = (uint8_t*)((((unsigned long)end)/16)*16); while(s < mm_end) { __asm __volatile( @@ -447,6 +430,9 @@ d += 4; s += 12; } + __asm __volatile(SFENCE:::"memory"); + __asm __volatile(EMMS:::"memory"); +#endif while(s < end) { const int b= *s++; @@ -454,35 +440,24 @@ const int r= *s++; *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#else - unsigned j,i,num_pixels=src_size/3; - uint16_t *d = (uint16_t *)dst; - for(i=0,j=0; j<num_pixels; i+=3,j++) - { - const int b= src[i+0]; - const int g= src[i+1]; - const int r= src[i+2]; - - d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); - } -#endif } static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) { + const uint8_t *s = src; + const uint8_t *end; #ifdef HAVE_MMX - const uint8_t *s = src; - const uint8_t *end,*mm_end; + const uint8_t *mm_end; +#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; - mm_end = end -11; +#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_15mask),"m"(green_15mask)); + mm_end = (uint8_t*)((((unsigned long)end)/16)*16); while(s < mm_end) { __asm __volatile( @@ -518,6 +493,9 @@ d += 4; s += 12; } + __asm __volatile(SFENCE:::"memory"); + __asm __volatile(EMMS:::"memory"); +#endif while(s < end) { const int b= *s++; @@ -525,25 +503,448 @@ const int r= *s++; *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); } +} + +/* + I use here less accurate approximation by simply + left-shifting the input + value and filling the low order bits with + zeroes. This method improves png's + compression but this scheme cannot reproduce white exactly, since it does not + generate an all-ones maximum value; the net effect is to darken the + image slightly. + + The better method should be "left bit replication": + + 4 3 2 1 0 + --------- + 1 1 0 1 1 + + 7 6 5 4 3 2 1 0 + ---------------- + 1 1 0 1 1 1 1 0 + |=======| |===| + | Leftmost Bits Repeated to Fill Open Bits + | + Original Bits +*/ +static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) +{ + const uint16_t *end; +#ifdef HAVE_MMX + const uint16_t *mm_end; +#endif + uint8_t *d = (uint8_t *)dst; + const uint16_t *s = (uint16_t *)src; + end = s + src_size/2; +#ifdef HAVE_MMX + __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); + mm_end = (uint16_t*)((((unsigned long)end)/8)*8); + while(s < mm_end) + { + __asm __volatile( + PREFETCH" 32%1\n\t" + "movq %1, %%mm0\n\t" + "movq %1, %%mm1\n\t" + "movq %1, %%mm2\n\t" + "pand %2, %%mm0\n\t" + "pand %3, %%mm1\n\t" + "pand %4, %%mm2\n\t" + "psllq $3, %%mm0\n\t" + "psrlq $2, %%mm1\n\t" + "psrlq $7, %%mm2\n\t" + "movq %%mm0, %%mm3\n\t" + "movq %%mm1, %%mm4\n\t" + "movq %%mm2, %%mm5\n\t" + "punpcklwd %5, %%mm0\n\t" + "punpcklwd %5, %%mm1\n\t" + "punpcklwd %5, %%mm2\n\t" + "punpckhwd %5, %%mm3\n\t" + "punpckhwd %5, %%mm4\n\t" + "punpckhwd %5, %%mm5\n\t" + "psllq $8, %%mm1\n\t" + "psllq $16, %%mm2\n\t" + "por %%mm1, %%mm0\n\t" + "por %%mm2, %%mm0\n\t" + "psllq $8, %%mm4\n\t" + "psllq $16, %%mm5\n\t" + "por %%mm4, %%mm3\n\t" + "por %%mm5, %%mm3\n\t" + + "movq %%mm0, %%mm6\n\t" + "movq %%mm3, %%mm7\n\t" + + "movq 8%1, %%mm0\n\t" + "movq 8%1, %%mm1\n\t" + "movq 8%1, %%mm2\n\t" + "pand %2, %%mm0\n\t" + "pand %3, %%mm1\n\t" + "pand %4, %%mm2\n\t" + "psllq $3, %%mm0\n\t" + "psrlq $2, %%mm1\n\t" + "psrlq $7, %%mm2\n\t" + "movq %%mm0, %%mm3\n\t" + "movq %%mm1, %%mm4\n\t" + "movq %%mm2, %%mm5\n\t" + "punpcklwd %5, %%mm0\n\t" + "punpcklwd %5, %%mm1\n\t" + "punpcklwd %5, %%mm2\n\t" + "punpckhwd %5, %%mm3\n\t" + "punpckhwd %5, %%mm4\n\t" + "punpckhwd %5, %%mm5\n\t" + "psllq $8, %%mm1\n\t" + "psllq $16, %%mm2\n\t" + "por %%mm1, %%mm0\n\t" + "por %%mm2, %%mm0\n\t" + "psllq $8, %%mm4\n\t" + "psllq $16, %%mm5\n\t" + "por %%mm4, %%mm3\n\t" + "por %%mm5, %%mm3\n\t" + + :"=m"(*d) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) + :"memory"); + /* Borrowed 32 to 24 */ + __asm __volatile( + "movq %%mm0, %%mm4\n\t" + "movq %%mm3, %%mm5\n\t" + "movq %%mm6, %%mm0\n\t" + "movq %%mm7, %%mm1\n\t" + + "movq %%mm4, %%mm6\n\t" + "movq %%mm5, %%mm7\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + + "psrlq $8, %%mm2\n\t" + "psrlq $8, %%mm3\n\t" + "psrlq $8, %%mm6\n\t" + "psrlq $8, %%mm7\n\t" + "pand %2, %%mm0\n\t" + "pand %2, %%mm1\n\t" + "pand %2, %%mm4\n\t" + "pand %2, %%mm5\n\t" + "pand %3, %%mm2\n\t" + "pand %3, %%mm3\n\t" + "pand %3, %%mm6\n\t" + "pand %3, %%mm7\n\t" + "por %%mm2, %%mm0\n\t" + "por %%mm3, %%mm1\n\t" + "por %%mm6, %%mm4\n\t" + "por %%mm7, %%mm5\n\t" + + "movq %%mm1, %%mm2\n\t" + "movq %%mm4, %%mm3\n\t" + "psllq $48, %%mm2\n\t" + "psllq $32, %%mm3\n\t" + "pand %4, %%mm2\n\t" + "pand %5, %%mm3\n\t" + "por %%mm2, %%mm0\n\t" + "psrlq $16, %%mm1\n\t" + "psrlq $32, %%mm4\n\t" + "psllq $16, %%mm5\n\t" + "por %%mm3, %%mm1\n\t" + "pand %6, %%mm5\n\t" + "por %%mm5, %%mm4\n\t" + + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm1, 8%0\n\t" + MOVNTQ" %%mm4, 16%0" + + :"=m"(*d) + :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) + :"memory"); + d += 24; + s += 8; + } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory"); -#else - unsigned j,i,num_pixels=src_size/3; - uint16_t *d = (uint16_t *)dst; - for(i=0,j=0; j<num_pixels; i+=3,j++) +#endif + while(s < end) + { + register uint16_t bgr; + bgr = *s++; + *d++ = (bgr&0x1F)<<3; + *d++ = (bgr&0x3E0)>>2; + *d++ = (bgr&0x7C00)>>7; + } +} + +static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) +{ + const uint16_t *end; +#ifdef HAVE_MMX + const uint16_t *mm_end; +#endif + uint8_t *d = (uint8_t *)dst; + const uint16_t *s = (const uint16_t *)src; + end = s + src_size/2; +#ifdef HAVE_MMX + __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); + mm_end = (uint16_t*)((((unsigned long)end)/8)*8); + while(s < mm_end) { - const int b= src[i+0]; - const int g= src[i+1]; - const int r= src[i+2]; + __asm __volatile( + PREFETCH" 32%1\n\t" + "movq %1, %%mm0\n\t" + "movq %1, %%mm1\n\t" + "movq %1, %%mm2\n\t" + "pand %2, %%mm0\n\t" + "pand %3, %%mm1\n\t" + "pand %4, %%mm2\n\t" + "psllq $3, %%mm0\n\t" + "psrlq $3, %%mm1\n\t" + "psrlq $8, %%mm2\n\t" + "movq %%mm0, %%mm3\n\t" + "movq %%mm1, %%mm4\n\t" + "movq %%mm2, %%mm5\n\t" + "punpcklwd %5, %%mm0\n\t" + "punpcklwd %5, %%mm1\n\t" + "punpcklwd %5, %%mm2\n\t" + "punpckhwd %5, %%mm3\n\t" + "punpckhwd %5, %%mm4\n\t" + "punpckhwd %5, %%mm5\n\t" + "psllq $8, %%mm1\n\t" + "psllq $16, %%mm2\n\t" + "por %%mm1, %%mm0\n\t" + "por %%mm2, %%mm0\n\t" + "psllq $8, %%mm4\n\t" + "psllq $16, %%mm5\n\t" + "por %%mm4, %%mm3\n\t" + "por %%mm5, %%mm3\n\t" + + "movq %%mm0, %%mm6\n\t" + "movq %%mm3, %%mm7\n\t" + + "movq 8%1, %%mm0\n\t" + "movq 8%1, %%mm1\n\t" + "movq 8%1, %%mm2\n\t" + "pand %2, %%mm0\n\t" + "pand %3, %%mm1\n\t" + "pand %4, %%mm2\n\t" + "psllq $3, %%mm0\n\t" + "psrlq $3, %%mm1\n\t" + "psrlq $8, %%mm2\n\t" + "movq %%mm0, %%mm3\n\t" + "movq %%mm1, %%mm4\n\t" + "movq %%mm2, %%mm5\n\t" + "punpcklwd %5, %%mm0\n\t" + "punpcklwd %5, %%mm1\n\t" + "punpcklwd %5, %%mm2\n\t" + "punpckhwd %5, %%mm3\n\t" + "punpckhwd %5, %%mm4\n\t" + "punpckhwd %5, %%mm5\n\t" + "psllq $8, %%mm1\n\t" + "psllq $16, %%mm2\n\t" + "por %%mm1, %%mm0\n\t" + "por %%mm2, %%mm0\n\t" + "psllq $8, %%mm4\n\t" + "psllq $16, %%mm5\n\t" + "por %%mm4, %%mm3\n\t" + "por %%mm5, %%mm3\n\t" + :"=m"(*d) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + :"memory"); + /* Borrowed 32 to 24 */ + __asm __volatile( + "movq %%mm0, %%mm4\n\t" + "movq %%mm3, %%mm5\n\t" + "movq %%mm6, %%mm0\n\t" + "movq %%mm7, %%mm1\n\t" + + "movq %%mm4, %%mm6\n\t" + "movq %%mm5, %%mm7\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + + "psrlq $8, %%mm2\n\t" + "psrlq $8, %%mm3\n\t" + "psrlq $8, %%mm6\n\t" + "psrlq $8, %%mm7\n\t" + "pand %2, %%mm0\n\t" + "pand %2, %%mm1\n\t" + "pand %2, %%mm4\n\t" + "pand %2, %%mm5\n\t" + "pand %3, %%mm2\n\t" + "pand %3, %%mm3\n\t" + "pand %3, %%mm6\n\t" + "pand %3, %%mm7\n\t" + "por %%mm2, %%mm0\n\t" + "por %%mm3, %%mm1\n\t" + "por %%mm6, %%mm4\n\t" + "por %%mm7, %%mm5\n\t" + + "movq %%mm1, %%mm2\n\t" + "movq %%mm4, %%mm3\n\t" + "psllq $48, %%mm2\n\t" + "psllq $32, %%mm3\n\t" + "pand %4, %%mm2\n\t" + "pand %5, %%mm3\n\t" + "por %%mm2, %%mm0\n\t" + "psrlq $16, %%mm1\n\t" + "psrlq $32, %%mm4\n\t" + "psllq $16, %%mm5\n\t" + "por %%mm3, %%mm1\n\t" + "pand %6, %%mm5\n\t" + "por %%mm5, %%mm4\n\t" + + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm1, 8%0\n\t" + MOVNTQ" %%mm4, 16%0" + + :"=m"(*d) + :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) + :"memory"); + d += 24; + s += 8; + } + __asm __volatile(SFENCE:::"memory"); + __asm __volatile(EMMS:::"memory"); +#endif + while(s < end) + { + register uint16_t bgr; + bgr = *s++; + *d++ = (bgr&0x1F)<<3; + *d++ = (bgr&0x7E0)>>3; + *d++ = (bgr&0xF800)>>8; + } +} - d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); +static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) +{ + const uint16_t *end; +#ifdef HAVE_MMX + const uint16_t *mm_end; +#endif + uint8_t *d = (uint8_t *)dst; + const uint16_t *s = (const uint16_t *)src; + end = s + src_size/2; +#ifdef HAVE_MMX + __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); + __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); + mm_end = (uint16_t*)((((unsigned long)end)/4)*4); + while(s < mm_end) + { + __asm __volatile( + PREFETCH" 32%1\n\t" + "movq %1, %%mm0\n\t" + "movq %1, %%mm1\n\t" + "movq %1, %%mm2\n\t" + "pand %2, %%mm0\n\t" + "pand %3, %%mm1\n\t" + "pand %4, %%mm2\n\t" + "psllq $3, %%mm0\n\t" + "psrlq $2, %%mm1\n\t" + "psrlq $7, %%mm2\n\t" + "movq %%mm0, %%mm3\n\t" + "movq %%mm1, %%mm4\n\t" + "movq %%mm2, %%mm5\n\t" + "punpcklwd %%mm7, %%mm0\n\t" + "punpcklwd %%mm7, %%mm1\n\t" + "punpcklwd %%mm7, %%mm2\n\t" + "punpckhwd %%mm7, %%mm3\n\t" + "punpckhwd %%mm7, %%mm4\n\t" + "punpckhwd %%mm7, %%mm5\n\t" + "psllq $8, %%mm1\n\t" + "psllq $16, %%mm2\n\t" + "por %%mm1, %%mm0\n\t" + "por %%mm2, %%mm0\n\t" + "psllq $8, %%mm4\n\t" + "psllq $16, %%mm5\n\t" + "por %%mm4, %%mm3\n\t" + "por %%mm5, %%mm3\n\t" + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm3, 8%0\n\t" + :"=m"(*d) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + :"memory"); + d += 16; + s += 4; + } + __asm __volatile(SFENCE:::"memory"); + __asm __volatile(EMMS:::"memory"); +#endif + while(s < end) + { + register uint16_t bgr; + bgr = *s++; + *d++ = (bgr&0x1F)<<3; + *d++ = (bgr&0x3E0)>>2; + *d++ = (bgr&0x7C00)>>7; + *d++ = 0; } +} + +static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) +{ + const uint16_t *end; +#ifdef HAVE_MMX + const uint16_t *mm_end; #endif + uint8_t *d = (uint8_t *)dst; + const uint16_t *s = (uint16_t *)src; + end = s + src_size/2; +#ifdef HAVE_MMX + __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); + __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); + mm_end = (uint16_t*)((((unsigned long)end)/4)*4); + while(s < mm_end) + { + __asm __volatile( + PREFETCH" 32%1\n\t" + "movq %1, %%mm0\n\t" + "movq %1, %%mm1\n\t" + "movq %1, %%mm2\n\t" + "pand %2, %%mm0\n\t" + "pand %3, %%mm1\n\t" + "pand %4, %%mm2\n\t" + "psllq $3, %%mm0\n\t" + "psrlq $3, %%mm1\n\t" + "psrlq $8, %%mm2\n\t" + "movq %%mm0, %%mm3\n\t" + "movq %%mm1, %%mm4\n\t" + "movq %%mm2, %%mm5\n\t" + "punpcklwd %%mm7, %%mm0\n\t" + "punpcklwd %%mm7, %%mm1\n\t" + "punpcklwd %%mm7, %%mm2\n\t" + "punpckhwd %%mm7, %%mm3\n\t" + "punpckhwd %%mm7, %%mm4\n\t" + "punpckhwd %%mm7, %%mm5\n\t" + "psllq $8, %%mm1\n\t" + "psllq $16, %%mm2\n\t" + "por %%mm1, %%mm0\n\t" + "por %%mm2, %%mm0\n\t" + "psllq $8, %%mm4\n\t" + "psllq $16, %%mm5\n\t" + "por %%mm4, %%mm3\n\t" + "por %%mm5, %%mm3\n\t" + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm3, 8%0\n\t" + :"=m"(*d) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + :"memory"); + d += 16; + s += 4; + } + __asm __volatile(SFENCE:::"memory"); + __asm __volatile(EMMS:::"memory"); +#endif + while(s < end) + { + register uint16_t bgr; + bgr = *s++; + *d++ = (bgr&0x1F)<<3; + *d++ = (bgr&0x7E0)>>3; + *d++ = (bgr&0xF800)>>8; + *d++ = 0; + } } static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size) { #ifdef HAVE_MMX +/* TODO: unroll this loop */ asm volatile ( "xorl %%eax, %%eax \n\t" ".balign 16 \n\t" @@ -554,9 +955,9 @@ "movq %%mm0, %%mm2 \n\t" "pslld $16, %%mm0 \n\t" "psrld $16, %%mm1 \n\t" - "pand "MANGLE(mask32r)", %%mm0 \n\t" - "pand "MANGLE(mask32g)", %%mm2 \n\t" - "pand "MANGLE(mask32b)", %%mm1 \n\t" + "pand "MANGLE(mask32r)", %%mm0 \n\t" + "pand "MANGLE(mask32g)", %%mm2 \n\t" + "pand "MANGLE(mask32b)", %%mm1 \n\t" "por %%mm0, %%mm2 \n\t" "por %%mm1, %%mm2 \n\t" MOVNTQ" %%mm2, (%1, %%eax) \n\t" @@ -570,8 +971,8 @@ __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory"); #else - int i; - int num_pixels= src_size >> 2; + unsigned i; + unsigned num_pixels = src_size >> 2; for(i=0; i<num_pixels; i++) { dst[4*i + 0] = src[4*i + 2]; @@ -583,7 +984,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) { - int i; + unsigned i; #ifdef HAVE_MMX int mmx_size= 23 - src_size; asm volatile ( @@ -631,15 +1032,16 @@ __asm __volatile(EMMS:::"memory"); if(mmx_size==23) return; //finihsed, was multiple of 8 + src+= src_size; dst+= src_size; - src_size= 23 - mmx_size; + src_size= 23-mmx_size; src-= src_size; dst-= src_size; #endif for(i=0; i<src_size; i+=3) { - register int x; + register uint8_t x; x = src[i + 2]; dst[i + 1] = src[i + 1]; dst[i + 2] = src[i + 0]; @@ -651,8 +1053,8 @@ unsigned int width, unsigned int height, unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma) { - int y; - const int chromWidth= width>>1; + unsigned y; + const unsigned chromWidth= width>>1; for(y=0; y<height; y++) { #ifdef HAVE_MMX @@ -691,14 +1093,33 @@ : "%eax" ); #else +#if __WORDSIZE >= 64 int i; - for(i=0; i<chromWidth; i++) - { - dst[4*i+0] = ysrc[2*i+0]; - dst[4*i+1] = usrc[i]; - dst[4*i+2] = ysrc[2*i+1]; - dst[4*i+3] = vsrc[i]; + uint64_t *ldst = (uint64_t *) dst; + const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; + for(i = 0; i < chromWidth; i += 2){ + uint64_t k, l; + k = yc[0] + (uc[0] << 8) + + (yc[1] << 16) + (vc[0] << 24); + l = yc[2] + (uc[1] << 8) + + (yc[3] << 16) + (vc[1] << 24); + *ldst++ = k + (l << 32); + yc += 4; + uc += 2; + vc += 2; } + +#else + int i, *idst = (int32_t *) dst; + const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; + for(i = 0; i < chromWidth; i++){ + *idst++ = yc[0] + (uc[0] << 8) + + (yc[1] << 16) + (vc[0] << 24); + yc += 2; + uc++; + vc++; + } +#endif #endif if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) { @@ -748,8 +1169,8 @@ unsigned int width, unsigned int height, unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) { - int y; - const int chromWidth= width>>1; + unsigned y; + const unsigned chromWidth= width>>1; for(y=0; y<height; y+=2) { #ifdef HAVE_MMX @@ -835,7 +1256,7 @@ : "memory", "%eax" ); #else - int i; + unsigned i; for(i=0; i<chromWidth; i++) { ydst[2*i+0] = src[4*i+0]; @@ -884,8 +1305,8 @@ unsigned int width, unsigned int height, unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) { - int y; - const int chromWidth= width>>1; + unsigned y; + const unsigned chromWidth= width>>1; for(y=0; y<height; y+=2) { #ifdef HAVE_MMX @@ -971,7 +1392,7 @@ : "memory", "%eax" ); #else - int i; + unsigned i; for(i=0; i<chromWidth; i++) { udst[i] = src[4*i+0]; @@ -1010,12 +1431,12 @@ unsigned int width, unsigned int height, unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) { - int y; - const int chromWidth= width>>1; + unsigned y; + const unsigned chromWidth= width>>1; #ifdef HAVE_MMX for(y=0; y<height-2; y+=2) { - int i; + unsigned i; for(i=0; i<2; i++) { asm volatile( @@ -1254,7 +1675,7 @@ #endif for(; y<height; y+=2) { - int i; + unsigned i; for(i=0; i<chromWidth; i++) { unsigned int b= src[6*i+0]; @@ -1304,12 +1725,13 @@ } void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, - int width, int height, int src1Stride, int src2Stride, int dstStride){ - int h; + unsigned width, unsigned height, unsigned src1Stride, + unsigned src2Stride, unsigned dstStride){ + unsigned h; for(h=0; h < height; h++) { - int w; + unsigned w; #ifdef HAVE_MMX #ifdef HAVE_SSE2
--- a/postproc/swscale.c Sat Jun 22 08:47:56 2002 +0000 +++ b/postproc/swscale.c Sat Jun 22 08:49:45 2002 +0000 @@ -65,6 +65,14 @@ #include "rgb2rgb.h" #include "../libvo/fastmemcpy.h" #include "../mp_msg.h" + +#define MSG_WARN(args...) mp_msg(MSGT_SWS,MSGL_WARN, ##args ) +#define MSG_FATAL(args...) mp_msg(MSGT_SWS,MSGL_FATAL, ##args ) +#define MSG_ERR(args...) mp_msg(MSGT_SWS,MSGL_ERR, ##args ) +#define MSG_V(args...) mp_msg(MSGT_SWS,MSGL_V, ##args ) +#define MSG_DBG2(args...) mp_msg(MSGT_SWS,MSGL_DBG2, ##args ) +#define MSG_INFO(args...) mp_msg(MSGT_SWS,MSGL_INFO, ##args ) + #undef MOVNTQ #undef PAVGB @@ -92,19 +100,26 @@ #endif //FIXME replace this with something faster -#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420) -#define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x)) -#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420) +#define isBGR(x) ((x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) +#define isRGB(x) ((x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24|| (x)==IMGFMT_RGB16|| (x)==IMGFMT_RGB15) +#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV|| (x)==IMGFMT_YVU9 || (x)==IMGFMT_IF09) +#define isYUV(x) (!(isBGR(x) || isRGB(x))) +#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) #define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420) -#define isPacked(x) ((x)==IMGFMT_YUY2 || ((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR || ((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB) -#define isGray(x) ((x)==IMGFMT_Y800) +#define isPacked(x) (isYUV(x) && !isPlanarYUV(x)) +#define isGray(x) ((x)==IMGFMT_Y800) /* Behaviour the same as PACKED but it's PLANAR */ #define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \ || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\ || (x)==IMGFMT_Y800) #define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \ || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) -#define isBGR(x) ((x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) +#define isSupportedUnscaledIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_NV12 \ + || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ + || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\ + || (x)==IMGFMT_Y800) +#define isSupportedUnscaledOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x) == IMGFMT_YUY2 \ + || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) #define RGB2YUV_SHIFT 16 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) @@ -751,7 +766,6 @@ if (flags&SWS_BICUBIC) filterSize= 4; else if(flags&SWS_X ) filterSize= 4; else filterSize= 2; // SWS_BILINEAR / SWS_AREA -// printf("%d %d %d\n", filterSize, srcW, dstW); filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); xDstInSrc= xInc/2 - 0x8000; @@ -780,12 +794,10 @@ y4 = ( -1.0*d + 1.0*d*d*d)/6.0; } -// printf("%d %d %d \n", coeff, (int)d, xDstInSrc); filter[i*filterSize + 0]= y1; filter[i*filterSize + 1]= y2; filter[i*filterSize + 2]= y3; filter[i*filterSize + 3]= y4; -// printf("%1.3f %1.3f %1.3f %1.3f %1.3f\n",d , y1, y2, y3, y4); } else { @@ -795,7 +807,6 @@ double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16); double coeff= 1.0 - d; if(coeff<0) coeff=0; - // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); filter[i*filterSize + j]= coeff; xx++; } @@ -812,7 +823,6 @@ else if(flags&SWS_X) filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); else if(flags&SWS_AREA) filterSize= (int)ceil(1 + 1.0*srcW / (double)dstW); else /* BILINEAR */ filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW); -// printf("%d %d %d\n", *filterSize, srcW, dstW); filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); xDstInSrc= xInc/2 - 0x8000; @@ -849,7 +859,6 @@ coeff= 1.0 - d; if(coeff<0) coeff=0; } -// printf("%1.3f %2.3f %d \n", coeff, d, xDstInSrc); filter[i*filterSize + j]= coeff; xx++; } @@ -940,7 +949,7 @@ *outFilterSize= filterSize; if(flags&SWS_PRINT_INFO) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); + MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); /* try to reduce the filter-size (step2 reduce it) */ for(i=0; i<dstW; i++) { @@ -1254,6 +1263,32 @@ #endif //!RUNTIME_CPUDETECT } +static void PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ + uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; + /* Copy Y plane */ + if(dstStride[0]==srcStride[0]) + memcpy(dst, src[0], srcSliceH*dstStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst; + for(i=0; i<srcSliceH; i++) + { + memcpy(dstPtr, srcPtr, srcStride[0]); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } + dst = dstParam[1] + dstStride[1]*srcSliceY; + if(c->srcFormat==IMGFMT_YV12) + interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] ); + else /* I420 & IYUV */ + interleaveBytes( src[2],src[1],dst,c->srcW,srcSliceH,srcStride[2],srcStride[1],dstStride[0] ); +} + + /* Warper functions for yuv2bgr */ static void planarYuvToBgr(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dstParam[], int dstStride[]){ @@ -1265,6 +1300,16 @@ yuv2rgb( dst,src[0],src[2],src[1],c->srcW,srcSliceH,dstStride[0],srcStride[0],srcStride[1] ); } +static void Planar2PackedWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ + uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; + + if(c->srcFormat==IMGFMT_YV12) + yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] ); + else /* I420 & IYUV */ + yv12toyuy2( src[0],src[2],src[1],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] ); +} + static void bgr24to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ @@ -1285,6 +1330,46 @@ } } +static void bgr24to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + if(dstStride[0]*3==srcStride[0]*2) + rgb24to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + rgb24to16(srcPtr, dstPtr, c->srcW*3); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } +} + +static void bgr24to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + if(dstStride[0]*3==srcStride[0]*2) + rgb24to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + rgb24to15(srcPtr, dstPtr, c->srcW*3); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } +} + static void bgr32to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ @@ -1305,6 +1390,46 @@ } } +static void bgr32to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + if(dstStride[0]*4==srcStride[0]*2) + rgb32to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + rgb32to16(srcPtr, dstPtr, c->srcW<<2); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } +} + +static void bgr32to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + if(dstStride[0]*4==srcStride[0]*2) + rgb32to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + rgb32to15(srcPtr, dstPtr, c->srcW<<2); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } +} + static void bgr15to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ @@ -1325,6 +1450,86 @@ } } +static void bgr15to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + if(dstStride[0]*2==srcStride[0]*3) + rgb15to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + rgb15to24(srcPtr, dstPtr, c->srcW<<1); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } +} + +static void bgr15to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + if(dstStride[0]*2==srcStride[0]*4) + rgb15to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + rgb15to32(srcPtr, dstPtr, c->srcW<<1); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } +} + +static void bgr16to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + if(dstStride[0]*2==srcStride[0]*3) + rgb16to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + rgb16to24(srcPtr, dstPtr, c->srcW<<1); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } +} + +static void bgr16to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + if(dstStride[0]*2==srcStride[0]*4) + rgb16to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else + { + int i; + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + rgb16to32(srcPtr, dstPtr, c->srcW<<1); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } +} + static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ @@ -1346,21 +1551,25 @@ uint8_t *src[3]; uint8_t *dst[3]; - if(c->srcFormat == IMGFMT_I420){ + if(isPlanarYUV(c->srcFormat)) + { + if(c->srcFormat == IMGFMT_I420 || c->srcFormat == IMGFMT_IYUV){ src[0]= srcParam[0]; src[1]= srcParam[2]; src[2]= srcParam[1]; srcStride[0]= srcStrideParam[0]; srcStride[1]= srcStrideParam[2]; srcStride[2]= srcStrideParam[1]; - } - else if(c->srcFormat==IMGFMT_YV12){ + } + else + { src[0]= srcParam[0]; src[1]= srcParam[1]; src[2]= srcParam[2]; srcStride[0]= srcStrideParam[0]; srcStride[1]= srcStrideParam[1]; srcStride[2]= srcStrideParam[2]; + } } else if(isPacked(c->srcFormat) || isGray(c->srcFormat)){ src[0]= srcParam[0]; @@ -1371,7 +1580,7 @@ srcStride[2]= 0; } - if(c->dstFormat == IMGFMT_I420){ + if(c->dstFormat == IMGFMT_I420 || c->dstFormat == IMGFMT_IYUV){ dst[0]= dstParam[0]; dst[1]= dstParam[2]; dst[2]= dstParam[1]; @@ -1411,9 +1620,21 @@ int plane; for(plane=0; plane<3; plane++) { - int length= plane==0 ? c->srcW : ((c->srcW+1)>>1); - int y= plane==0 ? srcSliceY: ((srcSliceY+1)>>1); - int height= plane==0 ? srcSliceH: ((srcSliceH+1)>>1); + int length; + int y; + int height; + if(c->srcFormat == IMGFMT_YVU9 || c->srcFormat == IMGFMT_IF09) + { + length= plane==0 ? c->srcW : ((c->srcW+1)>>2); + y= plane==0 ? srcSliceY: ((srcSliceY+1)>>2); + height= plane==0 ? srcSliceH: ((srcSliceH+1)>>2); + } + else + { + length= plane==0 ? c->srcW : ((c->srcW+1)>>1); + y= plane==0 ? srcSliceY: ((srcSliceY+1)>>1); + height= plane==0 ? srcSliceH: ((srcSliceH+1)>>1); + } if(dstStride[plane]==srcStride[plane]) memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]); @@ -1433,12 +1654,23 @@ } } +static uint32_t remove_dup_fourcc(uint32_t fourcc) +{ + switch(fourcc) + { + case IMGFMT_IYUV: return IMGFMT_I420; + case IMGFMT_Y8 : return IMGFMT_Y800; + default: return fourcc; + } +} + SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter){ SwsContext *c; int i; int usesFilter; + int simple_copy, unscaled_copy; SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; #ifdef ARCH_X86 @@ -1449,25 +1681,44 @@ if(swScale==NULL) globalInit(); /* avoid dupplicate Formats, so we dont need to check to much */ - if(srcFormat==IMGFMT_IYUV) srcFormat=IMGFMT_I420; - if(srcFormat==IMGFMT_Y8) srcFormat=IMGFMT_Y800; - if(dstFormat==IMGFMT_Y8) dstFormat=IMGFMT_Y800; - - if(!isSupportedIn(srcFormat)) + srcFormat = remove_dup_fourcc(srcFormat); + dstFormat = remove_dup_fourcc(dstFormat); + /* don't refuse this beauty */ + unscaled_copy = (srcW == dstW && srcH == dstH); + simple_copy = (srcW == dstW && srcH == dstH && srcFormat == dstFormat); + if(!simple_copy) { - mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %s is not supported as input format\n", vo_format_name(srcFormat)); - return NULL; + if(unscaled_copy) + { + if(!isSupportedUnscaledIn(srcFormat)) + { + MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat)); + return NULL; + } + if(!isSupportedUnscaledOut(dstFormat)) + { + MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat)); + return NULL; + } + } + else + { + if(!isSupportedIn(srcFormat)) + { + MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat)); + return NULL; + } + if(!isSupportedOut(dstFormat)) + { + MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat)); + return NULL; + } + } } - if(!isSupportedOut(dstFormat)) - { - mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %s is not supported as output format\n", vo_format_name(dstFormat)); - return NULL; - } - /* sanity check */ if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code { - mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", + MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", srcW, srcH, dstW, dstH); return NULL; } @@ -1501,6 +1752,26 @@ /* unscaled special Cases */ if(srcW==dstW && srcH==dstH && !usesFilter) { + /* yv12_to_nv12 */ + if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_NV12) + { + c->swScale= PlanarToNV12Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + /* yv12_to_yuy2 */ + if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_YUY2) + { + c->swScale= Planar2PackedWrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } /* yuv2bgr */ if(isPlanarYUV(srcFormat) && isBGR(dstFormat)) { @@ -1516,7 +1787,7 @@ c->swScale= planarYuvToBgr; if(flags&SWS_PRINT_INFO) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", vo_format_name(srcFormat), vo_format_name(dstFormat)); return c; } @@ -1527,7 +1798,7 @@ c->swScale= simpleCopy; if(flags&SWS_PRINT_INFO) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", vo_format_name(srcFormat), vo_format_name(dstFormat)); return c; } @@ -1539,7 +1810,31 @@ c->swScale= bgr32to24Wrapper; if(flags&SWS_PRINT_INFO) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + + /* bgr32to16 & rgb32to16*/ + if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR16) + ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB16)) + { + c->swScale= bgr32to16Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + + /* bgr32to15 & rgb32to15*/ + if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR15) + ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB15)) + { + c->swScale= bgr32to15Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", vo_format_name(srcFormat), vo_format_name(dstFormat)); return c; } @@ -1551,7 +1846,31 @@ c->swScale= bgr24to32Wrapper; if(flags&SWS_PRINT_INFO) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + + /* bgr24to16 & rgb24to16*/ + if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR16) + ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB16)) + { + c->swScale= bgr24to16Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + + /* bgr24to15 & rgb24to15*/ + if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR15) + ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB15)) + { + c->swScale= bgr24to15Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", vo_format_name(srcFormat), vo_format_name(dstFormat)); return c; } @@ -1562,7 +1881,55 @@ c->swScale= bgr15to16Wrapper; if(flags&SWS_PRINT_INFO) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + + /* bgr15to24 */ + if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR24) + ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB24)) + { + c->swScale= bgr15to24Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + + /* bgr15to32 */ + if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR32) + ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB32)) + { + c->swScale= bgr15to32Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + + /* bgr16to24 */ + if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR24) + ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB24)) + { + c->swScale= bgr16to24Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + + /* bgr16to32 */ + if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR32) + ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB32)) + { + c->swScale= bgr16to32Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", vo_format_name(srcFormat), vo_format_name(dstFormat)); return c; } @@ -1573,7 +1940,7 @@ c->swScale= bgr24toyv12Wrapper; if(flags&SWS_PRINT_INFO) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", vo_format_name(srcFormat), vo_format_name(dstFormat)); return c; } @@ -1585,7 +1952,7 @@ if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { if(flags&SWS_PRINT_INFO) - mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); + MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); } } else @@ -1723,33 +2090,35 @@ char *dither= ""; #endif if(flags&SWS_FAST_BILINEAR) - mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: FAST_BILINEAR scaler, "); + MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, "); else if(flags&SWS_BILINEAR) - mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: BILINEAR scaler, "); + MSG_INFO("\nSwScaler: BILINEAR scaler, "); else if(flags&SWS_BICUBIC) - mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: BICUBIC scaler, "); + MSG_INFO("\nSwScaler: BICUBIC scaler, "); else if(flags&SWS_X) - mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Experimental scaler, "); + MSG_INFO("\nSwScaler: Experimental scaler, "); else if(flags&SWS_POINT) - mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Nearest Neighbor / POINT scaler, "); + MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, "); else if(flags&SWS_AREA) - mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Area Averageing scaler, "); + MSG_INFO("\nSwScaler: Area Averageing scaler, "); else - mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: ehh flags invalid?! "); + MSG_INFO("\nSwScaler: ehh flags invalid?! "); - mp_msg(MSGT_SWS,MSGL_INFO,"%dx%d %s -> %dx%d%s %s ", - srcW,srcH, vo_format_name(srcFormat), dstW,dstH, - (dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16) ? - dither : "", vo_format_name(dstFormat)); + if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16) + MSG_INFO("from %s to%s %s ", + vo_format_name(srcFormat), dither, vo_format_name(dstFormat)); + else + MSG_INFO("from %s to %s ", + vo_format_name(srcFormat), vo_format_name(dstFormat)); if(cpuCaps.hasMMX2) - mp_msg(MSGT_SWS,MSGL_INFO,"using MMX2\n"); + MSG_INFO("using MMX2\n"); else if(cpuCaps.has3DNow) - mp_msg(MSGT_SWS,MSGL_INFO,"using 3DNOW\n"); + MSG_INFO("using 3DNOW\n"); else if(cpuCaps.hasMMX) - mp_msg(MSGT_SWS,MSGL_INFO,"using MMX\n"); + MSG_INFO("using MMX\n"); else - mp_msg(MSGT_SWS,MSGL_INFO,"using C\n"); + MSG_INFO("using C\n"); } if((flags & SWS_PRINT_INFO) && verbose) @@ -1757,70 +2126,70 @@ if(cpuCaps.hasMMX) { if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); + MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); else { if(c->hLumFilterSize==4) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); + MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); else if(c->hLumFilterSize==8) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); + MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); else - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); + MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); if(c->hChrFilterSize==4) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); + MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); else if(c->hChrFilterSize==8) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); + MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); else - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); + MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); } } else { #ifdef ARCH_X86 - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using X86-Asm scaler for horizontal scaling\n"); + MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n"); #else if(flags & SWS_FAST_BILINEAR) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); + MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); else - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using C scaler for horizontal scaling\n"); + MSG_V("SwScaler: using C scaler for horizontal scaling\n"); #endif } if(isPlanarYUV(dstFormat)) { if(c->vLumFilterSize==1) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); + MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); else - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); + MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); } else { if(c->vLumFilterSize==1 && c->vChrFilterSize==2) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" + MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C"); else if(c->vLumFilterSize==2 && c->vChrFilterSize==2) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); + MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); else - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); + MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); } if(dstFormat==IMGFMT_BGR24) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR24 Converter\n", + MSG_V("SwScaler: using %s YV12->BGR24 Converter\n", cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C")); else if(dstFormat==IMGFMT_BGR32) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); + MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); else if(dstFormat==IMGFMT_BGR16) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); + MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); else if(dstFormat==IMGFMT_BGR15) - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); + MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); - mp_msg(MSGT_SWS,MSGL_V,"SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); + MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); } if((flags & SWS_PRINT_INFO) && verbose>1) { - mp_msg(MSGT_SWS,MSGL_DBG2,"SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", + MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); - mp_msg(MSGT_SWS,MSGL_DBG2,"SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", + MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc); } @@ -2039,9 +2408,9 @@ for(i=0; i<a->length; i++) { int x= (int)((a->coeff[i]-min)*60.0/range +0.5); - printf("%1.3f ", a->coeff[i]); - for(;x>0; x--) printf(" "); - printf("|\n"); + MSG_DBG2("%1.3f ", a->coeff[i]); + for(;x>0; x--) MSG_DBG2(" "); + MSG_DBG2("|\n"); } }
--- a/postproc/swscale_template.c Sat Jun 22 08:47:56 2002 +0000 +++ b/postproc/swscale_template.c Sat Jun 22 08:49:45 2002 +0000 @@ -2626,7 +2626,7 @@ srcStride[1]= srcStrideParam[1]; srcStride[2]= srcStrideParam[2]; } - else if(isPacked(c->srcFormat)){ + else if(isPacked(c->srcFormat) || isBGR(c->srcFormat) || isRGB(c->srcFormat)){ src[0]= src[1]= src[2]= srcParam[0];
--- a/postproc/yuv2rgb.c Sat Jun 22 08:47:56 2002 +0000 +++ b/postproc/yuv2rgb.c Sat Jun 22 08:49:45 2002 +0000 @@ -156,7 +156,7 @@ {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ }; -static void yuv2rgb_c_init (int bpp, int mode); +static void yuv2rgb_c_init (unsigned bpp, int mode); yuv2rgb_fun yuv2rgb; @@ -166,11 +166,11 @@ static void yuv2rgb_c (void * dst, uint8_t * py, uint8_t * pu, uint8_t * pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride) + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) { v_size >>= 1; - + while (v_size--) { yuv2rgb_c_internal (py, py + y_stride, pu, pv, dst, dst + rgb_stride, h_size, v_size<<1); @@ -182,7 +182,7 @@ } } -void yuv2rgb_init (int bpp, int mode) +void yuv2rgb_init (unsigned bpp, int mode) { yuv2rgb = NULL; #ifdef CAN_COMPILE_X86_ASM @@ -676,7 +676,7 @@ return -((-dividend + (divisor>>1)) / divisor); } -static void yuv2rgb_c_init (int bpp, int mode) +static void yuv2rgb_c_init (unsigned bpp, int mode) { int i; uint8_t table_Y[1024];
--- a/postproc/yuv2rgb_mlib.c Sat Jun 22 08:47:56 2002 +0000 +++ b/postproc/yuv2rgb_mlib.c Sat Jun 22 08:49:45 2002 +0000 @@ -29,8 +29,8 @@ static void mlib_YUV2ARGB420_32(uint8_t* image, uint8_t* py, uint8_t* pu, uint8_t* pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride) + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) { mlib_VideoColorYUV2ARGB420(image, py, pu, pv, h_size, v_size, rgb_stride, y_stride, uv_stride); @@ -38,8 +38,8 @@ static void mlib_YUV2ABGR420_32(uint8_t* image, uint8_t* py, uint8_t* pu, uint8_t* pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride) + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) { mlib_VideoColorYUV2ABGR420(image, py, pu, pv, h_size, v_size, rgb_stride, y_stride, uv_stride); @@ -47,15 +47,15 @@ static void mlib_YUV2RGB420_24(uint8_t* image, uint8_t* py, uint8_t* pu, uint8_t* pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride) + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) { mlib_VideoColorYUV2RGB420(image, py, pu, pv, h_size, v_size, rgb_stride, y_stride, uv_stride); } -yuv2rgb_fun yuv2rgb_init_mlib(int bpp, int mode) +yuv2rgb_fun yuv2rgb_init_mlib(unsigned bpp, int mode) { if( bpp == 24 )
--- a/postproc/yuv2rgb_template.c Sat Jun 22 08:47:56 2002 +0000 +++ b/postproc/yuv2rgb_template.c Sat Jun 22 08:49:45 2002 +0000 @@ -123,8 +123,8 @@ static inline void RENAME(yuv420_rgb16) (uint8_t * image, uint8_t * py, uint8_t * pu, uint8_t * pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride) + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) { int even = 1; int x, y; @@ -228,8 +228,8 @@ static inline void RENAME(yuv420_rgb15) (uint8_t * image, uint8_t * py, uint8_t * pu, uint8_t * pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride) + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) { int even = 1; int x, y; @@ -329,8 +329,8 @@ static inline void RENAME(yuv420_rgb24) (uint8_t * image, uint8_t * py, uint8_t * pu, uint8_t * pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride) + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) { int even = 1; int x, y; @@ -488,8 +488,8 @@ static inline void RENAME(yuv420_argb32) (uint8_t * image, uint8_t * py, uint8_t * pu, uint8_t * pv, - int h_size, int v_size, - int rgb_stride, int y_stride, int uv_stride) + unsigned h_size, unsigned v_size, + unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) { int even = 1; int x, y; @@ -584,7 +584,7 @@ __asm__ __volatile__ (EMMS); } -yuv2rgb_fun RENAME(yuv2rgb_init) (int bpp, int mode) +yuv2rgb_fun RENAME(yuv2rgb_init) (unsigned bpp, int mode) { if (bpp == 15 && mode == MODE_RGB) return RENAME(yuv420_rgb15); if (bpp == 16 && mode == MODE_RGB) return RENAME(yuv420_rgb16);