libavcodec.hg: libpostproc/postprocess

comparison libpostproc/postprocess_template.c @ 106:389391a6d0bf libavcodec

rewrote the horizontal lowpass filter to fix a bug which caused a blocky look added deinterlace filters (linear interpolate, linear blend, median) minor cleanups (removed some outcommented stuff)

author	michael
date	Mon, 15 Oct 2001 03:01:08 +0000
parents	a2f94bfb5793
children	bd163e13a0fb

comparison

equal deleted inserted replaced

-:a2f94bfb5793
+:389391a6d0bf
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 /*
-			C	MMX	MMX2	3DNow*
+			C	MMX	MMX2	3DNow
 isVertDC		Ec	Ec
 isVertMinMaxOk		Ec	Ec
-doVertLowPass		E		e	e*
+doVertLowPass		E		e	e
 doVertDefFilter		Ec	Ec	Ec
 isHorizDC		Ec	Ec
 isHorizMinMaxOk		a
-doHorizLowPass		E		a	a*
+doHorizLowPass		E		a	a
 doHorizDefFilter	E	ac	ac
 deRing
-Vertical RKAlgo1	E		a	a*
+Vertical RKAlgo1	E		a	a
-Vertical X1		a		E	E*
+Vertical X1		a		E	E
-Horizontal X1		a		E	E*
+Horizontal X1		a		E	E
+LinIpolDeinterlace	a		E	E*
+LinBlendDeinterlace	a		E	E*
+MedianDeinterlace	a		E
 * i dont have a 3dnow CPU -> its untested
 E = Exact implementation
 e = allmost exact implementation
 make the mainloop more flexible (variable number of blocks at once
 	(the if/else stuff per block is slowing things down)
 compare the quality & speed of all filters
 implement a few simple deinterlacing filters
 split this huge file
+fix warnings (unused vars, ...)
 ...
 Notes:
 */
 /*
 Changelog: use the CVS log
+rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
+added deinterlace filters (linear interpolate, linear blend, median)
+minor cleanups (removed some outcommented stuff)
 0.1.3
 	bugfixes: last 3 lines not brightness/contrast corrected
 		brightness statistics messed up with initial black pic
 	changed initial values of the brightness statistics
 	C++ -> C conversation
 //FIXME? |255-0| = 1 (shouldnt be a problem ...)
 /**
 * Check if the middle 8x8 Block in the given 8x10 block is flat
 */
 static inline int isVertDC(uint8_t src[], int stride){
-//	return true;
 	int numEq= 0;
 	int y;
 	src+= stride; // src points to begin of the 8x8 Block
 #ifdef HAVE_MMX
 	asm volatile(
-//		"int $3 \n\t"
 		"pushl %1\n\t"
 		"movq b7E, %%mm7					\n\t" // mm7 = 0x7F
 		"movq b7C, %%mm6					\n\t" // mm6 = 0x7D
 		"movq (%1), %%mm0				\n\t"
 		"addl %2, %1					\n\t"
 	}
 #endif
 }
 /**
-* Do a horizontal low pass filter on the 8x8 block
+* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
-* useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version)
+* useing the 7-Tap Filter   (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version)
 */
 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
 {
 //return;
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 		"punpcklbw %%mm2, %%mm2					\n\t"\
 		"movq %%mm2, (%0)					\n\t"\
 */
 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16
 /*
-31
-121
-121
-121
-121
-121
-121
-13
 Implemented	Exact 7-Tap
 9421		A321
 36421		64321
 334321		=
 1234321	=
 123433	=
 12463	  12346
 1249	   123A
 */
 #ifdef HAVE_MMX2
 #define HLP3(i)	"movq " #i "(%%eax), %%mm0				\n\t"\
 		"movq %%mm0, %%mm1					\n\t"\
 		"movq %%mm0, %%mm2					\n\t"\
 		"movq %%mm0, %%mm3					\n\t"\
 		"movd %%mm0, 4(%0)					\n\t"
 #else
 #define HLP3(i)	"movq " #i "(%%eax), %%mm0				\n\t"\
 		"movq %%mm0, %%mm1					\n\t"\
 		"movq %%mm0, %%mm2					\n\t"\
-		"movq %%mm0, %%mm3					\n\t"\
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
-		"movq %%mm0, %%mm4					\n\t"\
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
 		"psllq $8, %%mm1					\n\t"\
 		"psrlq $8, %%mm2					\n\t"\
-		"pand bm00000001, %%mm3					\n\t"\
+		"psrlq $24, %%mm3					\n\t"\
-		"pand bm10000000, %%mm4					\n\t"\
+		"psllq $56, %%mm4					\n\t"\
 		"por %%mm3, %%mm1					\n\t"\
 		"por %%mm4, %%mm2					\n\t"\
 		PAVGB(%%mm2, %%mm1)\
 		PAVGB(%%mm1, %%mm0)\
 \
 		"movd %%mm0, (%0)					\n\t"\
 		"psrlq $32, %%mm0					\n\t"\
 		"movd %%mm0, 4(%0)					\n\t"
 #endif
-#define HLP(i) HLP3(i)
+/* uses the 7-Tap Filter: 1112111 */
+#define NEW_HLP(i)\
+		"movq " #i "(%%eax), %%mm0				\n\t"\
+		"movq %%mm0, %%mm1					\n\t"\
+		"movq %%mm0, %%mm2					\n\t"\
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
+		"psllq $8, %%mm1					\n\t"\
+		"psrlq $8, %%mm2					\n\t"\
+		"psrlq $24, %%mm3					\n\t"\
+		"psllq $56, %%mm4					\n\t"\
+		"por %%mm3, %%mm1					\n\t"\
+		"por %%mm4, %%mm2					\n\t"\
+		"movq %%mm1, %%mm5					\n\t"\
+		PAVGB(%%mm2, %%mm1)\
+		PAVGB(%%mm1, %%mm0)\
+		"psllq $8, %%mm5					\n\t"\
+		"psrlq $8, %%mm2					\n\t"\
+		"por %%mm3, %%mm5					\n\t"\
+		"por %%mm4, %%mm2					\n\t"\
+		"movq %%mm5, %%mm1					\n\t"\
+		PAVGB(%%mm2, %%mm5)\
+		"psllq $8, %%mm1					\n\t"\
+		"psrlq $8, %%mm2					\n\t"\
+		"por %%mm3, %%mm1					\n\t"\
+		"por %%mm4, %%mm2					\n\t"\
+		PAVGB(%%mm2, %%mm1)\
+		PAVGB(%%mm1, %%mm5)\
+		PAVGB(%%mm5, %%mm0)\
+		"movd %%mm0, (%0)					\n\t"\
+		"psrlq $32, %%mm0					\n\t"\
+		"movd %%mm0, 4(%0)					\n\t"
+/* uses the 9-Tap Filter: 112242211 */
+#define NEW_HLP2(i)\
+		"movq " #i "(%%eax), %%mm0				\n\t" /*0001000*/\
+		"movq %%mm0, %%mm1					\n\t" /*0001000*/\
+		"movq %%mm0, %%mm2					\n\t" /*0001000*/\
+		"movd -4(%0), %%mm3					\n\t" /*0001000*/\
+		"movd 8(%0), %%mm4					\n\t" /*0001000*/\
+		"psllq $8, %%mm1					\n\t"\
+		"psrlq $8, %%mm2					\n\t"\
+		"psrlq $24, %%mm3					\n\t"\
+		"psllq $56, %%mm4					\n\t"\
+		"por %%mm3, %%mm1					\n\t" /*0010000*/\
+		"por %%mm4, %%mm2					\n\t" /*0000100*/\
+		"movq %%mm1, %%mm5					\n\t" /*0010000*/\
+		PAVGB(%%mm2, %%mm1)					      /*0010100*/\
+		PAVGB(%%mm1, %%mm0)					      /*0012100*/\
+		"psllq $8, %%mm5					\n\t"\
+		"psrlq $8, %%mm2					\n\t"\
+		"por %%mm3, %%mm5					\n\t" /*0100000*/\
+		"por %%mm4, %%mm2					\n\t" /*0000010*/\
+		"movq %%mm5, %%mm1					\n\t" /*0100000*/\
+		PAVGB(%%mm2, %%mm5)					      /*0100010*/\
+		"psllq $8, %%mm1					\n\t"\
+		"psrlq $8, %%mm2					\n\t"\
+		"por %%mm3, %%mm1					\n\t" /*1000000*/\
+		"por %%mm4, %%mm2					\n\t" /*0000001*/\
+		"movq %%mm1, %%mm6					\n\t" /*1000000*/\
+		PAVGB(%%mm2, %%mm1)					      /*1000001*/\
+		"psllq $8, %%mm6					\n\t"\
+		"psrlq $8, %%mm2					\n\t"\
+		"por %%mm3, %%mm6					\n\t"/*100000000*/\
+		"por %%mm4, %%mm2					\n\t"/*000000001*/\
+		PAVGB(%%mm2, %%mm6)					     /*100000001*/\
+		PAVGB(%%mm6, %%mm1)					     /*110000011*/\
+		PAVGB(%%mm1, %%mm5)					     /*112000211*/\
+		PAVGB(%%mm5, %%mm0)					     /*112242211*/\
+		"movd %%mm0, (%0)					\n\t"\
+		"psrlq $32, %%mm0					\n\t"\
+		"movd %%mm0, 4(%0)					\n\t"
+#define HLP(i) NEW_HLP(i)
 		HLP(0)
 		"addl %1, %0						\n\t"
 		HLP(8)
 		"addl %1, %0						\n\t"
 //FIXME
 #endif
 }
+/**
+* Deinterlaces the given block
+* will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
+*/
+static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride)
+{
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+	asm volatile(
+		"leal (%0, %1), %%eax				\n\t"
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
+//	0	1	2	3	4	5	6	7	8	9
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
+		"movq (%0), %%mm0				\n\t"
+		"movq (%%eax, %1), %%mm1			\n\t"
+		PAVGB(%%mm1, %%mm0)\
+		"movq %%mm0, (%%eax)				\n\t"
+		"movq (%0, %1, 4), %%mm0			\n\t"
+		PAVGB(%%mm0, %%mm1)\
+		"movq %%mm1, (%%eax, %1, 2)			\n\t"
+		"movq (%%ebx, %1), %%mm1			\n\t"
+		PAVGB(%%mm1, %%mm0)\
+		"movq %%mm0, (%%ebx)				\n\t"
+		"movq (%0, %1, 8), %%mm0			\n\t"
+		PAVGB(%%mm0, %%mm1)\
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
+		: : "r" (src), "r" (stride)
+		: "%eax", "%ebx"
+	);
+#else
+	int x;
+	for(x=0; x<8; x++)
+	{
+		src[stride]   = (src[0]        + src[stride*2])>>1;
+		src[stride*3] = (src[stride*2] + src[stride*4])>>1;
+		src[stride*5] = (src[stride*4] + src[stride*6])>>1;
+		src[stride*7] = (src[stride*6] + src[stride*8])>>1;
+		src++;
+	}
+#endif
+}
+/**
+* Deinterlaces the given block
+* will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
+*/
+static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride)
+{
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+	asm volatile(
+		"leal (%0, %1), %%eax				\n\t"
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
+//	0	1	2	3	4	5	6	7	8	9
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
+		"movq (%0), %%mm0				\n\t"
+		"movq (%%eax, %1), %%mm1			\n\t"
+		PAVGB(%%mm1, %%mm0)\
+		"movq %%mm0, (%%eax)				\n\t"
+		"movq (%0, %1, 4), %%mm0			\n\t"
+		PAVGB(%%mm0, %%mm1)\
+		"movq %%mm1, (%%eax, %1, 2)			\n\t"
+		"movq (%%ebx, %1), %%mm1			\n\t"
+		PAVGB(%%mm1, %%mm0)\
+		"movq %%mm0, (%%ebx)				\n\t"
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
+		: : "r" (src), "r" (stride)
+		: "%eax", "%ebx"
+	);
+#else
+	int x;
+	for(x=0; x<8; x++)
+	{
+		src[stride]   = (src[0]        + src[stride*2])>>1;
+		src[stride*3] = (src[stride*2] + src[stride*4])>>1;
+		src[stride*5] = (src[stride*4] + src[stride*6])>>1;
+		src[stride*7] = src[stride*6];
+		src++;
+	}
+#endif
+}
+/**
+* Deinterlaces the given block
+* will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
+* will shift the image up by 1 line (FIXME if this is a problem)
+*/
+static inline void deInterlaceBlendLinear(uint8_t src[], int stride)
+{
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+	asm volatile(
+		"leal (%0, %1), %%eax				\n\t"
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
+//	0	1	2	3	4	5	6	7	8	9
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
+		"movq (%0), %%mm0				\n\t" // L0
+		"movq (%%eax, %1), %%mm1			\n\t" // L2
+		PAVGB(%%mm1, %%mm0)				      // L0+L2
+		"movq (%%eax), %%mm2				\n\t" // L1
+		PAVGB(%%mm2, %%mm0)
+		"movq %%mm0, (%0)				\n\t"
+		"movq (%%eax, %1, 2), %%mm0			\n\t" // L3
+		PAVGB(%%mm0, %%mm2)				      // L1+L3
+		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
+		"movq %%mm2, (%%eax)				\n\t"
+		"movq (%0, %1, 4), %%mm2			\n\t" // L4
+		PAVGB(%%mm2, %%mm1)				      // L2+L4
+		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
+		"movq %%mm1, (%%eax, %1)			\n\t"
+		"movq (%%ebx), %%mm1				\n\t" // L5
+		PAVGB(%%mm1, %%mm0)				      // L3+L5
+		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
+		"movq %%mm0, (%%eax, %1, 2)			\n\t"
+		"movq (%%ebx, %1), %%mm0			\n\t" // L6
+		PAVGB(%%mm0, %%mm2)				      // L4+L6
+		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
+		"movq %%mm2, (%0, %1, 4)			\n\t"
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" // L7
+		PAVGB(%%mm2, %%mm1)				      // L5+L7
+		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
+		"movq %%mm1, (%%ebx)				\n\t"
+		"movq (%0, %1, 8), %%mm1			\n\t" // L8
+		PAVGB(%%mm1, %%mm0)				      // L6+L8
+		PAVGB(%%mm2, %%mm0)				      // 2L7 + L6 + L8
+		"movq %%mm0, (%%ebx, %1)			\n\t"
+		"movq (%%ebx, %1, 4), %%mm0			\n\t" // L9
+		PAVGB(%%mm0, %%mm2)				      // L7+L9
+		PAVGB(%%mm1, %%mm2)				      // 2L8 + L7 + L9
+		"movq %%mm2, (%%ebx, %1, 2)			\n\t"
+		: : "r" (src), "r" (stride)
+		: "%eax", "%ebx"
+	);
+#else
+	int x;
+	for(x=0; x<8; x++)
+	{
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
+		src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
+		src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
+		src++;
+	}
+#endif
+}
+/**
+* Deinterlaces the given block
+* will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
+* will shift the image up by 1 line (FIXME if this is a problem)
+*/
+static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride)
+{
+#if defined (HAVE_MMSX2) || defined (HAVE_3DNOW)
+	asm volatile(
+		"leal (%0, %1), %%eax				\n\t"
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
+//	0	1	2	3	4	5	6	7	8	9
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
+		"movq (%0), %%mm0				\n\t" // L0
+		"movq (%%eax, %1), %%mm1			\n\t" // L2
+		PAVGB(%%mm1, %%mm0)				      // L0+L2
+		"movq (%%eax), %%mm2				\n\t" // L1
+		PAVGB(%%mm2, %%mm0)
+		"movq %%mm0, (%0)				\n\t"
+		"movq (%%eax, %1, 2), %%mm0			\n\t" // L3
+		PAVGB(%%mm0, %%mm2)				      // L1+L3
+		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
+		"movq %%mm2, (%%eax)				\n\t"
+		"movq (%0, %1, 4), %%mm2			\n\t" // L4
+		PAVGB(%%mm2, %%mm1)				      // L2+L4
+		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
+		"movq %%mm1, (%%eax, %1)			\n\t"
+		"movq (%%ebx), %%mm1				\n\t" // L5
+		PAVGB(%%mm1, %%mm0)				      // L3+L5
+		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
+		"movq %%mm0, (%%eax, %1, 2)			\n\t"
+		"movq (%%ebx, %1), %%mm0			\n\t" // L6
+		PAVGB(%%mm0, %%mm2)				      // L4+L6
+		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
+		"movq %%mm2, (%0, %1, 4)			\n\t"
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" // L7
+		PAVGB(%%mm2, %%mm1)				      // L5+L7
+		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
+		"movq %%mm1, (%%ebx)				\n\t"
+		PAVGB(%%mm2, %%mm0)				      // L7 + L8
+		"movq %%mm0, (%%ebx, %1)			\n\t"
+		"movq %%mm0, (%%ebx, %1, 2)			\n\t"
+		: : "r" (src), "r" (stride)
+		: "%eax", "%ebx"
+	);
+#else
+	int x;
+	for(x=0; x<8; x++)
+	{
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
+		src[stride*6] = (src[stride*6] +   src[stride*7])>>1;
+		src[stride*7] = src[stride*6];
+		src++;
+	}
+#endif
+}
+/**
+* Deinterlaces the given block
+* will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
+*/
+static inline void deInterlaceMedian(uint8_t src[], int stride)
+{
+#if defined (HAVE_MMX2)
+	asm volatile(
+		"leal (%0, %1), %%eax				\n\t"
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
+//	0	1	2	3	4	5	6	7	8	9
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
+		"movq (%0), %%mm0				\n\t" //
+		"movq (%%eax, %1), %%mm2			\n\t" //
+		"movq (%%eax), %%mm1				\n\t" //
+		"movq %%mm0, %%mm3				\n\t"
+		"pmaxub %%mm1, %%mm0				\n\t" //
+		"pminub %%mm3, %%mm1				\n\t" //
+		"pmaxub %%mm2, %%mm1				\n\t" //
+		"pminub %%mm1, %%mm0				\n\t"
+		"movq %%mm0, (%%eax)				\n\t"
+		"movq (%0, %1, 4), %%mm0			\n\t" //
+		"movq (%%eax, %1, 2), %%mm1			\n\t" //
+		"movq %%mm2, %%mm3				\n\t"
+		"pmaxub %%mm1, %%mm2				\n\t" //
+		"pminub %%mm3, %%mm1				\n\t" //
+		"pmaxub %%mm0, %%mm1				\n\t" //
+		"pminub %%mm1, %%mm2				\n\t"
+		"movq %%mm2, (%%eax, %1, 2)			\n\t"
+		"movq (%%ebx), %%mm2				\n\t" //
+		"movq (%%ebx, %1), %%mm1			\n\t" //
+		"movq %%mm2, %%mm3				\n\t"
+		"pmaxub %%mm0, %%mm2				\n\t" //
+		"pminub %%mm3, %%mm0				\n\t" //
+		"pmaxub %%mm1, %%mm0				\n\t" //
+		"pminub %%mm0, %%mm2				\n\t"
+		"movq %%mm2, (%%ebx)				\n\t"
+		"movq (%%ebx, %1, 2), %%mm2			\n\t" //
+		"movq (%0, %1, 8), %%mm0			\n\t" //
+		"movq %%mm2, %%mm3				\n\t"
+		"pmaxub %%mm0, %%mm2				\n\t" //
+		"pminub %%mm3, %%mm0				\n\t" //
+		"pmaxub %%mm1, %%mm0				\n\t" //
+		"pminub %%mm0, %%mm2				\n\t"
+		"movq %%mm2, (%%ebx, %1, 2)			\n\t"
+		: : "r" (src), "r" (stride)
+		: "%eax", "%ebx"
+	);
+#else
+	//FIXME
+	int x;
+	for(x=0; x<8; x++)
+	{
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
+		src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
+		src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
+		src++;
+	}
+#endif
+}
+/**
+* Deinterlaces the given block
+* will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
+* will shift the image up by 1 line (FIXME if this is a problem)
+*/
+static inline void deInterlaceMedianLastRow(uint8_t src[], int stride)
+{
+#if defined (HAVE_MMX2)
+	asm volatile(
+		"leal (%0, %1), %%eax				\n\t"
+		"leal (%%eax, %1, 4), %%ebx			\n\t"
+//	0	1	2	3	4	5	6	7	8	9
+//	%0	eax	eax+%1	eax+2%1	%0+4%1	ebx	ebx+%1	ebx+2%1	%0+8%1	ebx+4%1
+		"movq (%0), %%mm0				\n\t" //
+		"movq (%%eax, %1), %%mm2			\n\t" //
+		"movq (%%eax), %%mm1				\n\t" //
+		"movq %%mm0, %%mm3				\n\t"
+		"pmaxub %%mm1, %%mm0				\n\t" //
+		"pminub %%mm3, %%mm1				\n\t" //
+		"pmaxub %%mm2, %%mm1				\n\t" //
+		"pminub %%mm1, %%mm0				\n\t"
+		"movq %%mm0, (%%eax)				\n\t"
+		"movq (%0, %1, 4), %%mm0			\n\t" //
+		"movq (%%eax, %1, 2), %%mm1			\n\t" //
+		"movq %%mm2, %%mm3				\n\t"
+		"pmaxub %%mm1, %%mm2				\n\t" //
+		"pminub %%mm3, %%mm1				\n\t" //
+		"pmaxub %%mm0, %%mm1				\n\t" //
+		"pminub %%mm1, %%mm2				\n\t"
+		"movq %%mm2, (%%eax, %1, 2)			\n\t"
+		"movq (%%ebx), %%mm2				\n\t" //
+		"movq (%%ebx, %1), %%mm1			\n\t" //
+		"movq %%mm2, %%mm3				\n\t"
+		"pmaxub %%mm0, %%mm2				\n\t" //
+		"pminub %%mm3, %%mm0				\n\t" //
+		"pmaxub %%mm1, %%mm0				\n\t" //
+		"pminub %%mm0, %%mm2				\n\t"
+		"movq %%mm2, (%%ebx)				\n\t"
+		"movq %%mm1, (%%ebx, %1, 2)			\n\t"
+		: : "r" (src), "r" (stride)
+		: "%eax", "%ebx"
+	);
+#else
+	//FIXME
+	int x;
+	for(x=0; x<8; x++)
+	{
+		src[0       ] = (src[0       ] + 2*src[stride  ] + src[stride*2])>>2;
+		src[stride  ] = (src[stride  ] + 2*src[stride*2] + src[stride*3])>>2;
+		src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
+		src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
+		src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
+		src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
+		src[stride*6] = (src[stride*6] +   src[stride*7])>>1;
+		src[stride*7] = src[stride*6];
+		src++;
+	}
+#endif
+}
 #ifdef HAVE_ODIVX_POSTPROCESS
 #include "../opendivx/postprocess.h"
 int use_old_pp=0;
 #endif
 /**
 * ...
 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63)
 * -63 is best quality -1 is worst
 */
-//extern "C"{
 void  postprocess(unsigned char * src[], int src_stride,
 unsigned char * dst[], int dst_stride,
 int horizontal_size,   int vertical_size,
 QP_STORE_T *QP_store,  int QP_stride,
 					  int mode)
 				if(!isColor) yHistogram[ srcBlock[0] ]++;
 				blockCopy(vertBlock + dstStride*2, dstStride,
 					vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX);
+				if(mode & LINEAR_IPOL_DEINT_FILTER)
+					deInterlaceInterpolateLinear(dstBlock, dstStride);
+				else if(mode & LINEAR_BLEND_DEINT_FILTER)
+					deInterlaceBlendLinear(dstBlock, dstStride);
+				else if(mode & MEDIAN_DEINT_FILTER)
+					deInterlaceMedian(dstBlock, dstStride);
+/*				else if(mode & CUBIC_IPOL_DEINT_FILTER)
+					deInterlaceInterpolateCubic(dstBlock, dstStride);
+				else if(mode & CUBIC_BLEND_DEINT_FILTER)
+					deInterlaceBlendCubic(dstBlock, dstStride);
+*/
 #ifdef MORE_TIMEING
 				T1= rdtsc();
 				memcpyTime+= T1-T0;
 				T0=T1;
 				vertTime+= T1-T0;
 				T0=T1;
 #endif
 			}
 			else
+			{
 				blockCopy(vertBlock + dstStride*1, dstStride,
 					vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX);
+				if(mode & LINEAR_IPOL_DEINT_FILTER)
+					deInterlaceInterpolateLinearLastRow(dstBlock, dstStride);
+				else if(mode & LINEAR_BLEND_DEINT_FILTER)
+					deInterlaceBlendLinearLastRow(dstBlock, dstStride);
+				else if(mode & MEDIAN_DEINT_FILTER)
+					deInterlaceMedianLastRow(dstBlock, dstStride);
+/*				else if(mode & CUBIC_IPOL_DEINT_FILTER)
+					deInterlaceInterpolateCubicLastRow(dstBlock, dstStride);
+				else if(mode & CUBIC_BLEND_DEINT_FILTER)
+					deInterlaceBlendCubicLastRow(dstBlock, dstStride);
+*/
+			}
 			if(x - 8 >= 0 && x<width)
 			{
 #ifdef MORE_TIMEING
 				T0= rdtsc();

Mercurial > libavcodec.hg

comparison libpostproc/postprocess_template.c @ 106:389391a6d0bf libavcodec