comparison postproc/swscale_template.c @ 2566:a350d8bed636

bugfixes small speedup for the non x86 stuff
author michael
date Tue, 30 Oct 2001 20:30:24 +0000
parents f2e70944d02a
children 30b736e7feef
comparison
equal deleted inserted replaced
2565:4bc54a0f775f 2566:a350d8bed636
31 BGR15 & BGR16 MMX verions support dithering 31 BGR15 & BGR16 MMX verions support dithering
32 Special versions: fast Y 1:1 scaling (no interpolation in y direction) 32 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
33 33
34 TODO 34 TODO
35 more intelligent missalignment avoidance for the horizontal scaler 35 more intelligent missalignment avoidance for the horizontal scaler
36 bicubic scaler
36 */ 37 */
37 38
38 #define ABS(a) ((a) > 0 ? (a) : (-(a))) 39 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
39 #define MIN(a,b) ((a) > (b) ? (b) : (a)) 40 #define MIN(a,b) ((a) > (b) ? (b) : (a))
40 #define MAX(a,b) ((a) < (b) ? (b) : (a)) 41 #define MAX(a,b) ((a) < (b) ? (b) : (a))
910 { 911 {
911 int i; 912 int i;
912 unsigned int xpos=0; 913 unsigned int xpos=0;
913 // *** horizontal scale Y line to temp buffer 914 // *** horizontal scale Y line to temp buffer
914 #ifdef ARCH_X86 915 #ifdef ARCH_X86
915
916 #ifdef HAVE_MMX2 916 #ifdef HAVE_MMX2
917 if(canMMX2BeUsed) 917 if(canMMX2BeUsed)
918 { 918 {
919 asm volatile( 919 asm volatile(
920 "pxor %%mm7, %%mm7 \n\t" 920 "pxor %%mm7, %%mm7 \n\t"
1010 #endif 1010 #endif
1011 #else 1011 #else
1012 for(i=0;i<dstWidth;i++){ 1012 for(i=0;i<dstWidth;i++){
1013 register unsigned int xx=xpos>>16; 1013 register unsigned int xx=xpos>>16;
1014 register unsigned int xalpha=(xpos&0xFFFF)>>9; 1014 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1015 dst[i]=(src[xx]*(xalpha^127)+src[xx+1]*xalpha); 1015 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1016 xpos+=xInc; 1016 xpos+=xInc;
1017 } 1017 }
1018 #endif 1018 #endif
1019 } 1019 }
1020 1020
1066 1066
1067 FUNNYUVCODE 1067 FUNNYUVCODE
1068 FUNNYUVCODE 1068 FUNNYUVCODE
1069 FUNNYUVCODE 1069 FUNNYUVCODE
1070 FUNNYUVCODE 1070 FUNNYUVCODE
1071
1072
1073 "xorl %%eax, %%eax \n\t" // i 1071 "xorl %%eax, %%eax \n\t" // i
1074 "movl %6, %%esi \n\t" // src 1072 "movl %6, %%esi \n\t" // src
1075 "movl %1, %%edi \n\t" // buf1 1073 "movl %1, %%edi \n\t" // buf1
1076 "addl $4096, %%edi \n\t" 1074 "addl $4096, %%edi \n\t"
1077 1075
1141 for(i=0;i<dstWidth;i++){ 1139 for(i=0;i<dstWidth;i++){
1142 register unsigned int xx=xpos>>16; 1140 register unsigned int xx=xpos>>16;
1143 register unsigned int xalpha=(xpos&0xFFFF)>>9; 1141 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1144 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); 1142 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1145 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); 1143 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1144 /* slower
1145 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
1146 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
1147 */
1146 xpos+=xInc; 1148 xpos+=xInc;
1147 } 1149 }
1148 #endif 1150 #endif
1149 } 1151 }
1150 1152
1193 // would be like the vertical one, but that would require some special code for the 1195 // would be like the vertical one, but that would require some special code for the
1194 // first and last pixel 1196 // first and last pixel
1195 if(canMMX2BeUsed) s_xinc+= 20; 1197 if(canMMX2BeUsed) s_xinc+= 20;
1196 else s_xinc = ((srcWidth-2)<<16)/(dstw-2) - 20; 1198 else s_xinc = ((srcWidth-2)<<16)/(dstw-2) - 20;
1197 1199
1198 if(fullUVIpol && !dstbpp==12) s_xinc2= s_xinc>>1; 1200 if(fullUVIpol && !(dstbpp==12)) s_xinc2= s_xinc>>1;
1199 else s_xinc2= s_xinc; 1201 else s_xinc2= s_xinc;
1200 // force calculation of the horizontal interpolation of the first line 1202 // force calculation of the horizontal interpolation of the first line
1201 1203
1202 if(y==0){ 1204 if(y==0){
1203 s_last_ypos=-99; 1205 s_last_ypos=-99;
1204 s_last_y1pos=-99; 1206 s_last_y1pos=-99;
1438 #endif 1440 #endif
1439 } 1441 }
1440 1442
1441 #ifdef HAVE_MMX 1443 #ifdef HAVE_MMX
1442 __asm __volatile(SFENCE:::"memory"); 1444 __asm __volatile(SFENCE:::"memory");
1443 __asm __volatile(EMMS); 1445 __asm __volatile(EMMS:::"memory");
1444 #endif 1446 #endif
1445 } 1447 }
1446 1448
1447 1449
1448 void SwScale_Init(){ 1450 void SwScale_Init(){