Mercurial > mplayer.hg
changeset 4481:b8ec9cc1b2c5
MMX(2) optimized YUY2 input
avoid duplicate checks for formats by changeing them (Y8->Y800, IYUV->I420)
author | michael |
---|---|
date | Sat, 02 Feb 2002 18:18:58 +0000 |
parents | 0919d2ec5e22 |
children | 9c6ab57cfdde |
files | postproc/swscale.c postproc/swscale_template.c |
diffstat | 2 files changed, 71 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/postproc/swscale.c Sat Feb 02 18:09:35 2002 +0000 +++ b/postproc/swscale.c Sat Feb 02 18:18:58 2002 +0000 @@ -17,7 +17,7 @@ */ /* - supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24 (grayscale soon too) + supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24, Y8, Y800 supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too) BGR15/16 support dithering */ @@ -58,13 +58,19 @@ #endif //FIXME replace this with something faster -#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) +#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420) #define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x)) -#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) -#define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) +#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420) +#define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420) #define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24) +#define isGray(x) ((x)==IMGFMT_Y800) +#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \ + || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24\ + || (x)==IMGFMT_Y800) +#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \ + || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) -#define RGB2YUV_SHIFT 8 +#define RGB2YUV_SHIFT 16 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) @@ -90,7 +96,8 @@ write special vertical cubic upscale version Optimize C code (yv12 / minmax) add support for packed pixel yuv input & output -add support for Y8 input & output +add support for Y8 output +optimize bgr24 & bgr32 add BGR4 output support write special BGR->BGR scaler */ @@ -118,6 +125,7 @@ static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; +static uint64_t __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL; static volatile uint64_t __attribute__((aligned(8))) b5Dither; static volatile uint64_t __attribute__((aligned(8))) g5Dither; @@ -198,7 +206,7 @@ { volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+asm_yalpha1+ asm_uvalpha1+ - M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; + M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101; if(i) i=0; } #endif @@ -1114,11 +1122,15 @@ if(swScale==NULL) globalInit(); + /* avoid dupplicate Formats, so we dont need to check to much */ + if(srcFormat==IMGFMT_IYUV) srcFormat=IMGFMT_I420; + if(srcFormat==IMGFMT_Y8) srcFormat=IMGFMT_Y800; + + if(!isSupportedIn(srcFormat)) return NULL; + if(!isSupportedOut(dstFormat)) return NULL; + /* sanity check */ if(srcW<4 || srcH<1 || dstW<8 || dstH<1) return NULL; //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code - -// if(!isSupportedIn(srcFormat)) return NULL; -// if(!isSupportedOut(dstFormat)) return NULL; if(!dstFilter) dstFilter= &dummyFilter; if(!srcFilter) srcFilter= &dummyFilter;
--- a/postproc/swscale_template.c Sat Feb 02 18:09:35 2002 +0000 +++ b/postproc/swscale_template.c Sat Feb 02 18:18:58 2002 +0000 @@ -1535,9 +1535,26 @@ #endif } +//FIXME yuy2* can read upto 7 samples to much + static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width) { -#ifdef HAVE_MMXFIXME +#ifdef HAVE_MMX + asm volatile( + "movq "MANGLE(bm01010101)", %%mm2\n\t" + "movl %0, %%eax \n\t" + "1: \n\t" + "movq (%1, %%eax,2), %%mm0 \n\t" + "movq 8(%1, %%eax,2), %%mm1 \n\t" + "pand %%mm2, %%mm0 \n\t" + "pand %%mm2, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movq %%mm0, (%2, %%eax) \n\t" + "addl $8, %%eax \n\t" + " js 1b \n\t" + : : "g" (-width), "r" (src+width*2), "r" (dst+width) + : "%eax" + ); #else int i; for(i=0; i<width; i++) @@ -1547,7 +1564,32 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) { -#ifdef HAVE_MMXFIXME +#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) + asm volatile( + "movq "MANGLE(bm01010101)", %%mm4\n\t" + "movl %0, %%eax \n\t" + "1: \n\t" + "movq (%1, %%eax,4), %%mm0 \n\t" + "movq 8(%1, %%eax,4), %%mm1 \n\t" + "movq (%2, %%eax,4), %%mm2 \n\t" + "movq 8(%2, %%eax,4), %%mm3 \n\t" + PAVGB(%%mm2, %%mm0) + PAVGB(%%mm3, %%mm1) + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "pand %%mm4, %%mm1 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + "movd %%mm0, (%4, %%eax) \n\t" + "movd %%mm1, (%3, %%eax) \n\t" + "addl $4, %%eax \n\t" + " js 1b \n\t" + : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) + : "%eax" + ); #else int i; for(i=0; i<width; i++) @@ -1954,6 +1996,10 @@ src1= formatConvBuffer; src2= formatConvBuffer+2048; } + else if(isGray(srcFormat)) + { + return; + } #ifdef HAVE_MMX // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) @@ -2170,7 +2216,7 @@ srcStride[1]= srcStride[2]= srcStrideParam[0]<<1; } - else if(c->srcFormat==IMGFMT_Y8){ + else if(isGray(c->srcFormat)){ src[0]= srcParam[0]; src[1]= src[2]= NULL;