# HG changeset patch # User michael # Date 1004473824 0 # Node ID a350d8bed63677828c68f140eb85fe6893d1269c # Parent 4bc54a0f775f3ca796bd49001b742ca8b77b489f bugfixes small speedup for the non x86 stuff diff -r 4bc54a0f775f -r a350d8bed636 postproc/swscale.c --- a/postproc/swscale.c Tue Oct 30 18:45:54 2001 +0000 +++ b/postproc/swscale.c Tue Oct 30 20:30:24 2001 +0000 @@ -33,6 +33,7 @@ TODO more intelligent missalignment avoidance for the horizontal scaler +bicubic scaler */ #define ABS(a) ((a) > 0 ? (a) : (-(a))) @@ -912,7 +913,6 @@ unsigned int xpos=0; // *** horizontal scale Y line to temp buffer #ifdef ARCH_X86 - #ifdef HAVE_MMX2 if(canMMX2BeUsed) { @@ -1012,7 +1012,7 @@ for(i=0;i>16; register unsigned int xalpha=(xpos&0xFFFF)>>9; - dst[i]=(src[xx]*(xalpha^127)+src[xx+1]*xalpha); + dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; xpos+=xInc; } #endif @@ -1068,8 +1068,6 @@ FUNNYUVCODE FUNNYUVCODE FUNNYUVCODE - - "xorl %%eax, %%eax \n\t" // i "movl %6, %%esi \n\t" // src "movl %1, %%edi \n\t" // buf1 @@ -1143,6 +1141,10 @@ register unsigned int xalpha=(xpos&0xFFFF)>>9; dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); +/* slower + dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; + dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; +*/ xpos+=xInc; } #endif @@ -1195,8 +1197,8 @@ if(canMMX2BeUsed) s_xinc+= 20; else s_xinc = ((srcWidth-2)<<16)/(dstw-2) - 20; -if(fullUVIpol && !dstbpp==12) s_xinc2= s_xinc>>1; -else s_xinc2= s_xinc; +if(fullUVIpol && !(dstbpp==12)) s_xinc2= s_xinc>>1; +else s_xinc2= s_xinc; // force calculation of the horizontal interpolation of the first line if(y==0){ @@ -1440,7 +1442,7 @@ #ifdef HAVE_MMX __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS); + __asm __volatile(EMMS:::"memory"); #endif } diff -r 4bc54a0f775f -r a350d8bed636 postproc/swscale_template.c --- a/postproc/swscale_template.c Tue Oct 30 18:45:54 2001 +0000 +++ b/postproc/swscale_template.c Tue Oct 30 20:30:24 2001 +0000 @@ -33,6 +33,7 @@ TODO more intelligent missalignment avoidance for the horizontal scaler +bicubic scaler */ #define ABS(a) ((a) > 0 ? (a) : (-(a))) @@ -912,7 +913,6 @@ unsigned int xpos=0; // *** horizontal scale Y line to temp buffer #ifdef ARCH_X86 - #ifdef HAVE_MMX2 if(canMMX2BeUsed) { @@ -1012,7 +1012,7 @@ for(i=0;i>16; register unsigned int xalpha=(xpos&0xFFFF)>>9; - dst[i]=(src[xx]*(xalpha^127)+src[xx+1]*xalpha); + dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; xpos+=xInc; } #endif @@ -1068,8 +1068,6 @@ FUNNYUVCODE FUNNYUVCODE FUNNYUVCODE - - "xorl %%eax, %%eax \n\t" // i "movl %6, %%esi \n\t" // src "movl %1, %%edi \n\t" // buf1 @@ -1143,6 +1141,10 @@ register unsigned int xalpha=(xpos&0xFFFF)>>9; dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); +/* slower + dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; + dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; +*/ xpos+=xInc; } #endif @@ -1195,8 +1197,8 @@ if(canMMX2BeUsed) s_xinc+= 20; else s_xinc = ((srcWidth-2)<<16)/(dstw-2) - 20; -if(fullUVIpol && !dstbpp==12) s_xinc2= s_xinc>>1; -else s_xinc2= s_xinc; +if(fullUVIpol && !(dstbpp==12)) s_xinc2= s_xinc>>1; +else s_xinc2= s_xinc; // force calculation of the horizontal interpolation of the first line if(y==0){ @@ -1440,7 +1442,7 @@ #ifdef HAVE_MMX __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS); + __asm __volatile(EMMS:::"memory"); #endif }