changeset 1158:71d890b5c13b libavcodec

faster C linear blend & interpolate deinterlacers
author michaelni
date Sat, 29 Mar 2003 14:06:24 +0000
parents 57fe9c4e0c6e
children 13ea348d4cf2
files libpostproc/postprocess_template.c
diffstat 1 files changed, 51 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/libpostproc/postprocess_template.c	Sat Mar 29 13:31:12 2003 +0000
+++ b/libpostproc/postprocess_template.c	Sat Mar 29 14:06:24 2003 +0000
@@ -1562,15 +1562,20 @@
 		: "%eax", "%ecx"
 	);
 #else
-	int x;
+	int a, b, x;
 	src+= 4*stride;
-	for(x=0; x<8; x++)
-	{
-		src[stride]   = (src[0]        + src[stride*2])>>1;
-		src[stride*3] = (src[stride*2] + src[stride*4])>>1;
-		src[stride*5] = (src[stride*4] + src[stride*6])>>1;
-		src[stride*7] = (src[stride*6] + src[stride*8])>>1;
-		src++;
+
+	for(x=0; x<2; x++){
+		a= *(uint32_t*)&src[stride*0];
+		b= *(uint32_t*)&src[stride*2];
+		*(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+		a= *(uint32_t*)&src[stride*4];
+		*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+		b= *(uint32_t*)&src[stride*6];
+		*(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+		a= *(uint32_t*)&src[stride*8];
+		*(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+		src += 4;
 	}
 #endif
 }
@@ -1875,19 +1880,45 @@
 		: "%eax", "%edx"
 	);
 #else
-	int x;
+	int a, b, c, x;
 	src+= 4*stride;
-	for(x=0; x<8; x++)
-	{
-		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
-		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
-		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
-		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
-		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
-		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
-		src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
-		src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
-		src++;
+
+	for(x=0; x<2; x++){
+		a= *(uint32_t*)&src[stride*0];
+		b= *(uint32_t*)&src[stride*1];
+		c= *(uint32_t*)&src[stride*2];
+		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
+		*(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+
+		a= *(uint32_t*)&src[stride*3];
+		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
+		*(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
+
+		b= *(uint32_t*)&src[stride*4];
+		c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
+		*(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
+
+		c= *(uint32_t*)&src[stride*5];
+		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
+		*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+
+		a= *(uint32_t*)&src[stride*6];
+		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
+		*(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
+
+		b= *(uint32_t*)&src[stride*7];
+		c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
+		*(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
+
+		c= *(uint32_t*)&src[stride*8];
+		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
+		*(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+
+		a= *(uint32_t*)&src[stride*9];
+		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
+		*(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
+
+		src += 4;
 	}
 #endif
 }