changeset 1157:57fe9c4e0c6e libavcodec

fixing cliping of c deinterlacers 5 tap lowpass deinterlacer
author michaelni
date Sat, 29 Mar 2003 13:31:12 +0000
parents 48efa413ac81
children 71d890b5c13b
files libpostproc/postprocess.c libpostproc/postprocess_internal.h libpostproc/postprocess_template.c
diffstat 3 files changed, 142 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/libpostproc/postprocess.c	Fri Mar 28 14:05:55 2003 +0000
+++ b/libpostproc/postprocess.c	Sat Mar 29 13:31:12 2003 +0000
@@ -113,6 +113,10 @@
 static uint64_t __attribute__((aligned(8))) b80= 		0x8080808080808080LL;
 #endif
 
+
+static uint8_t clip_table[3*256];
+static uint8_t * const clip_tab= clip_table + 256;
+
 static int verbose= 0;
 
 static const int deringThreshold= 20;
@@ -133,6 +137,7 @@
 	{"ci", "cubicipoldeint",	1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 	{"md", "mediandeint", 		1, 1, 4, MEDIAN_DEINT_FILTER},
 	{"fd", "ffmpegdeint", 		1, 1, 4, FFMPEG_DEINT_FILTER},
+	{"l5", "lowpass5", 		1, 1, 4, LOWPASS5_DEINT_FILTER},
 	{"tn", "tmpnoise", 		1, 7, 8, TEMP_NOISE_FILTER},
 	{"fq", "forcequant", 		1, 0, 0, FORCE_QUANT},
 	{NULL, NULL,0,0,0,0} //End Marker
@@ -751,15 +756,25 @@
 		reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 	}
 
-	reallocAlign((void **)&c->deintTemp, 8, width+16);
+	reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 	reallocAlign((void **)&c->nonBQPTable, 8, mbWidth*mbHeight*sizeof(QP_STORE_T));
 	reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 }
 
+static void global_init(){
+	int i;
+	memset(clip_table, 0, 256);
+	for(i=256; i<512; i++)
+		clip_table[i]= i;
+	memset(clip_table+512, 0, 256);
+}
+
 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
 	PPContext *c= memalign(32, sizeof(PPContext));
 	int stride= (width+15)&(~15); //assumed / will realloc if needed
         
+	global_init();
+
 	memset(c, 0, sizeof(PPContext));
 	c->cpuCaps= cpuCaps;
 	if(cpuCaps&PP_FORMAT){
--- a/libpostproc/postprocess_internal.h	Fri Mar 28 14:05:55 2003 +0000
+++ b/libpostproc/postprocess_internal.h	Sat Mar 29 13:31:12 2003 +0000
@@ -51,6 +51,7 @@
 #define	CUBIC_IPOL_DEINT_FILTER		0x40000	// 262144
 #define	MEDIAN_DEINT_FILTER		0x80000	// 524288
 #define	FFMPEG_DEINT_FILTER		0x400000
+#define	LOWPASS5_DEINT_FILTER		0x800000
 
 #define TEMP_NOISE_FILTER		0x100000
 #define FORCE_QUANT			0x200000
@@ -61,6 +62,17 @@
 //filters on
 //#define COMPILE_TIME_MODE 0x77
 
+#if 1
+static inline int CLIP(int a){
+	if(a&256) return ((a)>>31)^(-1);
+	else      return a;
+}
+//#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a))
+#elif 0
+#define CLIP(a) clip_tab[a]
+#else
+#define CLIP(a) (a)
+#endif
 /**
  * Postprocessng filter.
  */
--- a/libpostproc/postprocess_template.c	Fri Mar 28 14:05:55 2003 +0000
+++ b/libpostproc/postprocess_template.c	Sat Mar 29 13:31:12 2003 +0000
@@ -1581,7 +1581,6 @@
  * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
  * lines 4-12 will be read into the deblocking filter and should be deinterlaced
  * this filter will read lines 3-15 and write 7-13
- * no cliping in C version
  */
 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
 {
@@ -1631,10 +1630,10 @@
 	src+= stride*3;
 	for(x=0; x<8; x++)
 	{
-		src[stride*3] = (-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4;
-		src[stride*5] = (-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4;
-		src[stride*7] = (-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4;
-		src[stride*9] = (-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4;
+		src[stride*3] = CLIP((-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
+		src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
+		src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
+		src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
 		src++;
 	}
 #endif
@@ -1646,7 +1645,6 @@
  * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
  * lines 4-12 will be read into the deblocking filter and should be deinterlaced
  * this filter will read lines 4-13 and write 5-11
- * no cliping in C version
  */
 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
 {
@@ -1705,13 +1703,13 @@
 		int t1= tmp[x];
 		int t2= src[stride*1];
 
-		src[stride*1]= (-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3;
+		src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
 		t1= src[stride*4];
-		src[stride*3]= (-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3;
+		src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
 		t2= src[stride*6];
-		src[stride*5]= (-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3;
+		src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
 		t1= src[stride*8];
-		src[stride*7]= (-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3;
+		src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
 		tmp[x]= t1;
 
 		src++;
@@ -1720,6 +1718,106 @@
 }
 
 /**
+ * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter.
+ * will be called for every 8x8 block and can read & write from line 4-15
+ * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
+ * lines 4-12 will be read into the deblocking filter and should be deinterlaced
+ * this filter will read lines 4-13 and write 4-11
+ */
+static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
+{
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+	src+= stride*4;
+	asm volatile(
+		"leal (%0, %1), %%eax				\n\t"
+		"leal (%%eax, %1, 4), %%edx			\n\t"
+		"pxor %%mm7, %%mm7				\n\t"
+		"movq (%2), %%mm0				\n\t"
+		"movq (%3), %%mm1				\n\t"
+//	0	1	2	3	4	5	6	7	8	9	10
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx
+
+#define DEINT_L5(t1,t2,a,b,c)\
+		"movq " #a ", %%mm2				\n\t"\
+		"movq " #b ", %%mm3				\n\t"\
+		"movq " #c ", %%mm4				\n\t"\
+		PAVGB(t2, %%mm3)					\
+		PAVGB(t1, %%mm4)					\
+		"movq %%mm2, %%mm5				\n\t"\
+		"movq %%mm2, " #t1 "				\n\t"\
+		"punpcklbw %%mm7, %%mm2				\n\t"\
+		"punpckhbw %%mm7, %%mm5				\n\t"\
+		"movq %%mm2, %%mm6				\n\t"\
+		"paddw %%mm2, %%mm2				\n\t"\
+		"paddw %%mm6, %%mm2				\n\t"\
+		"movq %%mm5, %%mm6				\n\t"\
+		"paddw %%mm5, %%mm5				\n\t"\
+		"paddw %%mm6, %%mm5				\n\t"\
+		"movq %%mm3, %%mm6				\n\t"\
+		"punpcklbw %%mm7, %%mm3				\n\t"\
+		"punpckhbw %%mm7, %%mm6				\n\t"\
+		"paddw %%mm3, %%mm3				\n\t"\
+		"paddw %%mm6, %%mm6				\n\t"\
+		"paddw %%mm3, %%mm2				\n\t"\
+		"paddw %%mm6, %%mm5				\n\t"\
+		"movq %%mm4, %%mm6				\n\t"\
+		"punpcklbw %%mm7, %%mm4				\n\t"\
+		"punpckhbw %%mm7, %%mm6				\n\t"\
+		"psubw %%mm4, %%mm2				\n\t"\
+		"psubw %%mm6, %%mm5				\n\t"\
+		"psraw $2, %%mm2				\n\t"\
+		"psraw $2, %%mm5				\n\t"\
+		"packuswb %%mm5, %%mm2				\n\t"\
+		"movq %%mm2, " #a "				\n\t"\
+
+DEINT_L5(%%mm0, %%mm1, (%0)          , (%%eax)       , (%%eax, %1)   )
+DEINT_L5(%%mm1, %%mm0, (%%eax)       , (%%eax, %1)   , (%%eax, %1, 2))
+DEINT_L5(%%mm0, %%mm1, (%%eax, %1)   , (%%eax, %1, 2), (%0, %1, 4)   )
+DEINT_L5(%%mm1, %%mm0, (%%eax, %1, 2), (%0, %1, 4)   , (%%edx)       )
+DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)   , (%%edx)       , (%%edx, %1)   )  
+DEINT_L5(%%mm1, %%mm0, (%%edx)       , (%%edx, %1)   , (%%edx, %1, 2))
+DEINT_L5(%%mm0, %%mm1, (%%edx, %1)   , (%%edx, %1, 2), (%0, %1, 8)   )
+DEINT_L5(%%mm1, %%mm0, (%%edx, %1, 2), (%0, %1, 8)   , (%%edx, %1, 4))
+
+		"movq %%mm0, (%2)				\n\t"
+		"movq %%mm1, (%3)				\n\t"
+		: : "r" (src), "r" (stride), "r"(tmp), "r"(tmp2)
+		: "%eax", "%edx"
+	);
+#else
+	int x;
+	src+= stride*4;
+	for(x=0; x<8; x++)
+	{
+		int t1= tmp[x];
+		int t2= tmp2[x];
+		int t3= src[0];
+
+		src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
+		t1= src[stride*1];
+		src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
+		t2= src[stride*2];
+		src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
+		t3= src[stride*3];
+		src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
+		t1= src[stride*4];
+		src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
+		t2= src[stride*5];
+		src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
+		t3= src[stride*6];
+		src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
+		t1= src[stride*7];
+		src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
+
+		tmp[x]= t3;
+		tmp2[x]= t1;
+
+		src++;
+	}
+#endif
+}
+
+/**
  * Deinterlaces the given block by filtering all lines with a (1 2 1) filter.
  * will be called for every 8x8 block and can read & write from line 4-15
  * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
@@ -2696,7 +2794,8 @@
 
 	if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
 	else if(   (mode & LINEAR_BLEND_DEINT_FILTER)
-		|| (mode & FFMPEG_DEINT_FILTER)) copyAhead=14;
+		|| (mode & FFMPEG_DEINT_FILTER)
+		|| (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
 	else if(   (mode & V_DEBLOCK)
 		|| (mode & LINEAR_IPOL_DEINT_FILTER)
 		|| (mode & MEDIAN_DEINT_FILTER)) copyAhead=13;
@@ -2832,6 +2931,8 @@
 				RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
 			else if(mode & FFMPEG_DEINT_FILTER)
 				RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
+			else if(mode & LOWPASS5_DEINT_FILTER)
+				RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
 /*			else if(mode & CUBIC_BLEND_DEINT_FILTER)
 				RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
 */
@@ -2974,6 +3075,8 @@
 				RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
 			else if(mode & FFMPEG_DEINT_FILTER)
 				RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
+			else if(mode & LOWPASS5_DEINT_FILTER)
+				RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
 /*			else if(mode & CUBIC_BLEND_DEINT_FILTER)
 				RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
 */