# HG changeset patch # User diego # Date 1139667370 0 # Node ID 79081ba52e0041a705ebc245a79cb7115b5a9257 # Parent e1108996497c121a1d589cefa841a15782ff9a41 Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just once when the scaler is initialized, instead of building them and freeing them over and over. This gives massive performance improvements. patch by Alan Curry, pacman*at*TheWorld*dot*com diff -r e1108996497c -r 79081ba52e00 postproc/swscale.c --- a/postproc/swscale.c Sat Feb 11 13:42:54 2006 +0000 +++ b/postproc/swscale.c Sat Feb 11 14:16:10 2006 +0000 @@ -2110,6 +2110,25 @@ c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4, (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, srcFilter->chrV, dstFilter->chrV, c->param); + +#ifdef HAVE_ALTIVEC + c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH); + c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH); + + for (i=0;ivLumFilterSize*c->dstH;i++) { + int j; + short *p = (short *)&c->vYCoeffsBank[i]; + for (j=0;j<8;j++) + p[j] = c->vLumFilter[i]; + } + + for (i=0;ivChrFilterSize*c->dstH;i++) { + int j; + short *p = (short *)&c->vCCoeffsBank[i]; + for (j=0;j<8;j++) + p[j] = c->vChrFilter[i]; + } +#endif } // Calculate Buffer Sizes so that they won't run out while handling these damn slices @@ -2644,6 +2663,12 @@ c->hLumFilter = NULL; if(c->hChrFilter) free(c->hChrFilter); c->hChrFilter = NULL; +#ifdef HAVE_ALTIVEC + if(c->vYCoeffsBank) free(c->vYCoeffsBank); + c->vYCoeffsBank = NULL; + if(c->vCCoeffsBank) free(c->vCCoeffsBank); + c->vCCoeffsBank = NULL; +#endif if(c->vLumFilterPos) free(c->vLumFilterPos); c->vLumFilterPos = NULL; diff -r e1108996497c -r 79081ba52e00 postproc/swscale_internal.h --- a/postproc/swscale_internal.h Sat Feb 11 13:42:54 2006 +0000 +++ b/postproc/swscale_internal.h Sat Feb 11 14:16:10 2006 +0000 @@ -154,6 +154,7 @@ vector signed short CGV; vector signed short OY; vector unsigned short CSHIFT; + vector signed short *vYCoeffsBank, *vCCoeffsBank; #endif diff -r e1108996497c -r 79081ba52e00 postproc/yuv2rgb_altivec.c --- a/postproc/yuv2rgb_altivec.c Sat Feb 11 13:42:54 2006 +0000 +++ b/postproc/yuv2rgb_altivec.c Sat Feb 11 14:16:10 2006 +0000 @@ -774,8 +774,6 @@ uint8_t *dest, int dstW, int dstY) { int i,j; - short tmp __attribute__((aligned (16))); - int16_t *p; short *f; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; vector signed short R0,G0,B0,R1,G1,B1; @@ -787,29 +785,10 @@ vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0); unsigned long scratch[16] __attribute__ ((aligned (16))); - vector signed short *vYCoeffsBank, *vCCoeffsBank; - vector signed short *YCoeffs, *CCoeffs; - vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH); - vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH); - - for (i=0;idstH;i++) { - tmp = c->vLumFilter[i]; - p = &vYCoeffsBank[i]; - for (j=0;j<8;j++) - p[j] = tmp; - } - - for (i=0;idstH;i++) { - tmp = c->vChrFilter[i]; - p = &vCCoeffsBank[i]; - for (j=0;j<8;j++) - p[j] = tmp; - } - - YCoeffs = vYCoeffsBank+dstY*lumFilterSize; - CCoeffs = vCCoeffsBank+dstY*chrFilterSize; + YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize; + CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize; out = (vector unsigned char *)dest; @@ -962,7 +941,4 @@ memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4); } - if (vYCoeffsBank) free (vYCoeffsBank); - if (vCCoeffsBank) free (vCCoeffsBank); - }