changeset 2575:37da7219ebaf

c optimizations
author michael
date Tue, 30 Oct 2001 22:03:25 +0000
parents cc926eda63cd
children 437ed06579d8
files postproc/swscale.c postproc/swscale_template.c
diffstat 2 files changed, 136 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/postproc/swscale.c	Tue Oct 30 21:55:28 2001 +0000
+++ b/postproc/swscale.c	Tue Oct 30 22:03:25 2001 +0000
@@ -788,45 +788,96 @@
 //FIXME unroll C loop and dont recalculate UV
 		asm volatile ("\n\t"::: "memory");
 
-		if(dstbpp==32 || dstbpp==24)
+		if(dstbpp==32)
 		{
-			for(i=0;i<dstw;i++){
+			for(i=0; i<dstw-1; i+=2){
 				// vertical linear interpolation && yuv2rgb in a single step:
-				int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
 				int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
 				int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
-				dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
-				dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
-				dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
-				dest+=dstbpp>>3;
+
+				int Cb= yuvtab_40cf[U];
+				int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+				int Cr= yuvtab_3343[V];
+
+				dest[4*i+0]=clip_table[((Y1 + Cb) >>13)];
+				dest[4*i+1]=clip_table[((Y1 + Cg) >>13)];
+				dest[4*i+2]=clip_table[((Y1 + Cr) >>13)];
+
+				dest[4*i+4]=clip_table[((Y2 + Cb) >>13)];
+				dest[4*i+5]=clip_table[((Y2 + Cg) >>13)];
+				dest[4*i+6]=clip_table[((Y2 + Cr) >>13)];
+			}
+		}
+		if(dstbpp==24)
+		{
+			for(i=0; i<dstw-1; i+=2){
+				// vertical linear interpolation && yuv2rgb in a single step:
+				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
+				int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
+				int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+
+				int Cb= yuvtab_40cf[U];
+				int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+				int Cr= yuvtab_3343[V];
+
+				dest[0]=clip_table[((Y1 + Cb) >>13)];
+				dest[1]=clip_table[((Y1 + Cg) >>13)];
+				dest[2]=clip_table[((Y1 + Cr) >>13)];
+
+				dest[3]=clip_table[((Y2 + Cb) >>13)];
+				dest[4]=clip_table[((Y2 + Cg) >>13)];
+				dest[5]=clip_table[((Y2 + Cr) >>13)];
+				dest+=6;
 			}
 		}
 		else if(dstbpp==16)
 		{
-			for(i=0;i<dstw;i++){
+			for(i=0; i<dstw-1; i+=2){
 				// vertical linear interpolation && yuv2rgb in a single step:
-				int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
 				int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
 				int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
 
+				int Cb= yuvtab_40cf[U];
+				int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+				int Cr= yuvtab_3343[V];
+
 				((uint16_t*)dest)[i] =
-					(clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
-					((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) |
-					((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800);
+					(clip_table[(Y1 + Cb) >>13]>>3) |
+					((clip_table[(Y1 + Cg) >>13]<<3)&0x07E0) |
+					((clip_table[(Y1 + Cr) >>13]<<8)&0xF800);
+
+				((uint16_t*)dest)[i+1] =
+					(clip_table[(Y2 + Cb) >>13]>>3) |
+					((clip_table[(Y2 + Cg) >>13]<<3)&0x07E0) |
+					((clip_table[(Y2 + Cr) >>13]<<8)&0xF800);
 			}
 		}
 		else if(dstbpp==15)
 		{
-			for(i=0;i<dstw;i++){
+			for(i=0; i<dstw-1; i+=2){
 				// vertical linear interpolation && yuv2rgb in a single step:
-				int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
 				int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
 				int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
 
+				int Cb= yuvtab_40cf[U];
+				int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+				int Cr= yuvtab_3343[V];
+
 				((uint16_t*)dest)[i] =
-					(clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
-					((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) |
-					((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00);
+					(clip_table[(Y1 + Cb) >>13]>>3) |
+					((clip_table[(Y1 + Cg) >>13]<<2)&0x03E0) |
+					((clip_table[(Y1 + Cr) >>13]<<7)&0x7C00);
+				((uint16_t*)dest)[i+1] =
+					(clip_table[(Y2 + Cb) >>13]>>3) |
+					((clip_table[(Y2 + Cg) >>13]<<2)&0x03E0) |
+					((clip_table[(Y2 + Cr) >>13]<<7)&0x7C00);
 			}
 		}
 #endif
--- a/postproc/swscale_template.c	Tue Oct 30 21:55:28 2001 +0000
+++ b/postproc/swscale_template.c	Tue Oct 30 22:03:25 2001 +0000
@@ -788,45 +788,96 @@
 //FIXME unroll C loop and dont recalculate UV
 		asm volatile ("\n\t"::: "memory");
 
-		if(dstbpp==32 || dstbpp==24)
+		if(dstbpp==32)
 		{
-			for(i=0;i<dstw;i++){
+			for(i=0; i<dstw-1; i+=2){
 				// vertical linear interpolation && yuv2rgb in a single step:
-				int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
 				int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
 				int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
-				dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
-				dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
-				dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
-				dest+=dstbpp>>3;
+
+				int Cb= yuvtab_40cf[U];
+				int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+				int Cr= yuvtab_3343[V];
+
+				dest[4*i+0]=clip_table[((Y1 + Cb) >>13)];
+				dest[4*i+1]=clip_table[((Y1 + Cg) >>13)];
+				dest[4*i+2]=clip_table[((Y1 + Cr) >>13)];
+
+				dest[4*i+4]=clip_table[((Y2 + Cb) >>13)];
+				dest[4*i+5]=clip_table[((Y2 + Cg) >>13)];
+				dest[4*i+6]=clip_table[((Y2 + Cr) >>13)];
+			}
+		}
+		if(dstbpp==24)
+		{
+			for(i=0; i<dstw-1; i+=2){
+				// vertical linear interpolation && yuv2rgb in a single step:
+				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
+				int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
+				int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+
+				int Cb= yuvtab_40cf[U];
+				int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+				int Cr= yuvtab_3343[V];
+
+				dest[0]=clip_table[((Y1 + Cb) >>13)];
+				dest[1]=clip_table[((Y1 + Cg) >>13)];
+				dest[2]=clip_table[((Y1 + Cr) >>13)];
+
+				dest[3]=clip_table[((Y2 + Cb) >>13)];
+				dest[4]=clip_table[((Y2 + Cg) >>13)];
+				dest[5]=clip_table[((Y2 + Cr) >>13)];
+				dest+=6;
 			}
 		}
 		else if(dstbpp==16)
 		{
-			for(i=0;i<dstw;i++){
+			for(i=0; i<dstw-1; i+=2){
 				// vertical linear interpolation && yuv2rgb in a single step:
-				int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
 				int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
 				int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
 
+				int Cb= yuvtab_40cf[U];
+				int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+				int Cr= yuvtab_3343[V];
+
 				((uint16_t*)dest)[i] =
-					(clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
-					((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) |
-					((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800);
+					(clip_table[(Y1 + Cb) >>13]>>3) |
+					((clip_table[(Y1 + Cg) >>13]<<3)&0x07E0) |
+					((clip_table[(Y1 + Cr) >>13]<<8)&0xF800);
+
+				((uint16_t*)dest)[i+1] =
+					(clip_table[(Y2 + Cb) >>13]>>3) |
+					((clip_table[(Y2 + Cg) >>13]<<3)&0x07E0) |
+					((clip_table[(Y2 + Cr) >>13]<<8)&0xF800);
 			}
 		}
 		else if(dstbpp==15)
 		{
-			for(i=0;i<dstw;i++){
+			for(i=0; i<dstw-1; i+=2){
 				// vertical linear interpolation && yuv2rgb in a single step:
-				int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+				int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
 				int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
 				int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
 
+				int Cb= yuvtab_40cf[U];
+				int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+				int Cr= yuvtab_3343[V];
+
 				((uint16_t*)dest)[i] =
-					(clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
-					((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) |
-					((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00);
+					(clip_table[(Y1 + Cb) >>13]>>3) |
+					((clip_table[(Y1 + Cg) >>13]<<2)&0x03E0) |
+					((clip_table[(Y1 + Cr) >>13]<<7)&0x7C00);
+				((uint16_t*)dest)[i+1] =
+					(clip_table[(Y2 + Cb) >>13]>>3) |
+					((clip_table[(Y2 + Cg) >>13]<<2)&0x03E0) |
+					((clip_table[(Y2 + Cr) >>13]<<7)&0x7C00);
 			}
 		}
 #endif