changeset 7960:0a4ab841ae29

better deblocking filter
author michael
date Tue, 29 Oct 2002 18:35:15 +0000
parents 9b8dd240314d
children 78e0f2776da7
files libmpcodecs/vf_pp.c postproc/postprocess.c postproc/postprocess.h postproc/postprocess_template.c
diffstat 4 files changed, 104 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/libmpcodecs/vf_pp.c	Tue Oct 29 16:50:34 2002 +0000
+++ b/libmpcodecs/vf_pp.c	Tue Oct 29 18:35:15 2002 +0000
@@ -93,7 +93,8 @@
 		    vf->priv->dmpi->planes,vf->priv->dmpi->stride,
 		    (mpi->w+7)&(~7),mpi->h,
 		    mpi->qscale, mpi->qstride,
-		    &vf->priv->ppMode[ vf->priv->pp ], vf->priv->context);
+		    &vf->priv->ppMode[ vf->priv->pp ], vf->priv->context,
+		    mpi->pict_type);
     }
     
     return vf_next_put_image(vf,vf->priv->dmpi);
@@ -161,7 +162,7 @@
 	    ppMode.maxTmpNoise[2]= 3000;
 	    ppMode.maxAllowedY= 234;
 	    ppMode.minAllowedY= 16;
-	    ppMode.maxDcDiff= 1;
+	    ppMode.baseDcDiff= 256/4;
 	    ppMode.flatnessThreshold=40;
     
             vf->priv->ppMode[i]= ppMode;
--- a/postproc/postprocess.c	Tue Oct 29 16:50:34 2002 +0000
+++ b/postproc/postprocess.c	Tue Oct 29 18:35:15 2002 +0000
@@ -59,7 +59,6 @@
 split this huge file
 optimize c versions
 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
-put fastmemcpy back
 ...
 */
 
@@ -149,13 +148,14 @@
 
 	uint64_t __attribute__((aligned(8))) pQPb;
 	uint64_t __attribute__((aligned(8))) pQPb2;
+
+	uint64_t __attribute__((aligned(8))) mmxDcOffset[32];
+	uint64_t __attribute__((aligned(8))) mmxDcThreshold[32];
 	
-	uint64_t __attribute__((aligned(8))) mmxDcOffset;
-	uint64_t __attribute__((aligned(8))) mmxDcThreshold;
-
+	QP_STORE_T *nonBQPTable;
+	
 	int QP;
-	int dcOffset;
-	int dcThreshold;
+	int nonBQP;
 
 	int frameNum;
 
@@ -247,8 +247,8 @@
 {
 	int numEq= 0;
 	int y;
-	const int dcOffset= c->dcOffset;
-	const int dcThreshold= c->dcThreshold;
+	const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
+	const int dcThreshold= dcOffset*2 + 1;
 	for(y=0; y<BLOCK_SIZE; y++)
 	{
 		if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
@@ -269,8 +269,8 @@
 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
 	int numEq= 0;
 	int y;
-	const int dcOffset= c->dcOffset;
-	const int dcThreshold= c->dcThreshold;
+	const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
+	const int dcThreshold= dcOffset*2 + 1;
 	src+= stride*4; // src points to begin of the 8x8 Block
 	for(y=0; y<BLOCK_SIZE-1; y++)
 	{
@@ -725,7 +725,7 @@
 				else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
 				{
 					int o;
-					ppMode.maxDcDiff=1;
+					ppMode.baseDcDiff=256/4;
 //					hFlatnessThreshold= 40;
 //					vFlatnessThreshold= 40;
 
@@ -736,7 +736,7 @@
 						if(tail==options[o]) break;
 
 						numOfUnknownOptions--;
-						if(o==0) ppMode.maxDcDiff= val;
+						if(o==0) ppMode.baseDcDiff= val;
 						else ppMode.flatnessThreshold= val;
 					}
 				}
@@ -768,6 +768,8 @@
 void *getPPContext(int width, int height){
 	PPContext *c= memalign(32, sizeof(PPContext));
 	int i;
+	int mbWidth = (width+15)>>4;
+	int mbHeight= (height+15)>>4;
 
 	c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
 	c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
@@ -789,6 +791,8 @@
 	c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
 	c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
 	c->deintTemp= (uint8_t*)memalign(8, width+16);
+	c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
+	memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
 
 	c->frameNum=-1;
 
@@ -809,6 +813,7 @@
 	free(c->tempDstBlock);
 	free(c->tempSrcBlock);
 	free(c->deintTemp);
+	free(c->nonBQPTable);
 	
 	free(c);
 }
@@ -841,12 +846,14 @@
 
 void  postprocess(uint8_t * src[3], int srcStride[3],
                  uint8_t * dst[3], int dstStride[3],
-                 int horizontalSize, int verticalSize,
+                 int width, int height,
                  QP_STORE_T *QP_store,  int QPStride,
-		 PPMode *mode,  void *c)
+		 PPMode *mode,  void *vc, int pict_type)
 {
-
+	int mbWidth = (width+15)>>4;
+	int mbHeight= (height+15)>>4;
 	QP_STORE_T quantArray[2048/8];
+	PPContext *c = (PPContext*)vc;
 
 	if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
 	{
@@ -858,6 +865,29 @@
 		else
 			for(i=0; i<2048/8; i++) quantArray[i]= 1;
 	}
+if(0){
+int x,y;
+for(y=0; y<mbHeight; y++){
+	for(x=0; x<mbWidth; x++){
+		printf("%2d ", QP_store[x + y*QPStride]);
+	}
+	printf("\n");
+}
+	printf("\n");
+}
+//printf("pict_type:%d\n", pict_type);
+	if(pict_type!=3)
+	{
+		int x,y;
+		for(y=0; y<mbHeight; y++){
+			for(x=0; x<mbWidth; x++){
+				int qscale= QP_store[x + y*QPStride];
+				if(qscale&~31)
+				    qscale=31;
+				c->nonBQPTable[y*mbWidth + x]= qscale;
+			}
+		}
+	}
 
 	if(firstTime2 && verbose)
 	{
@@ -866,30 +896,30 @@
 	}
 
 	postProcess(src[0], srcStride[0], dst[0], dstStride[0],
-		horizontalSize, verticalSize, QP_store, QPStride, 0, mode, c);
+		width, height, QP_store, QPStride, 0, mode, c);
 
-	horizontalSize = (horizontalSize+1)>> 1;
-	verticalSize   = (verticalSize+1)>>1;
+	width  = (width +1)>>1;
+	height = (height+1)>>1;
 
 	if(mode->chromMode)
 	{
 		postProcess(src[1], srcStride[1], dst[1], dstStride[1],
-			horizontalSize, verticalSize, QP_store, QPStride, 1, mode, c);
+			width, height, QP_store, QPStride, 1, mode, c);
 		postProcess(src[2], srcStride[2], dst[2], dstStride[2],
-			horizontalSize, verticalSize, QP_store, QPStride, 2, mode, c);
+			width, height, QP_store, QPStride, 2, mode, c);
 	}
 	else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
 	{
-		memcpy(dst[1], src[1], srcStride[1]*verticalSize);
-		memcpy(dst[2], src[2], srcStride[2]*verticalSize);
+		memcpy(dst[1], src[1], srcStride[1]*height);
+		memcpy(dst[2], src[2], srcStride[2]*height);
 	}
 	else
 	{
 		int y;
-		for(y=0; y<verticalSize; y++)
+		for(y=0; y<height; y++)
 		{
-			memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), horizontalSize);
-			memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), horizontalSize);
+			memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
+			memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
 		}
 	}
 }
--- a/postproc/postprocess.h	Tue Oct 29 16:50:34 2002 +0000
+++ b/postproc/postprocess.h	Tue Oct 29 18:35:15 2002 +0000
@@ -54,7 +54,6 @@
 #define TEMP_NOISE_FILTER		0x100000
 #define FORCE_QUANT			0x200000
 
-
 #define GET_PP_QUALITY_MAX 6
 
 //use if u want a faster postprocessing code
@@ -76,8 +75,8 @@
 	int maxAllowedY; // for brihtness correction
 
 	int maxTmpNoise[3]; // for Temporal Noise Reducing filter (Maximal sum of abs differences)
-	
-	int maxDcDiff; // max abs diff between pixels to be considered flat
+
+	int baseDcDiff;
 	int flatnessThreshold;
 
 	int forcedQuant; // quantizer if FORCE_QUANT is used
@@ -87,7 +86,7 @@
                  uint8_t * dst[3], int dstStride[3],
                  int horizontalSize, int verticalSize,
                  QP_STORE_T *QP_store,  int QP_stride,
-		 PPMode *mode, void *ppContext);
+		 PPMode *mode, void *ppContext, int pict_type);
 
 // name is the stuff after "-pp" on the command line
 PPMode getPPModeByNameAndQuality(char *name, int quality);
--- a/postproc/postprocess_template.c	Tue Oct 29 16:50:34 2002 +0000
+++ b/postproc/postprocess_template.c	Tue Oct 29 18:35:15 2002 +0000
@@ -56,8 +56,9 @@
 		"leal (%1, %2), %%eax				\n\t"
 //	0	1	2	3	4	5	6	7	8	9
 //	%1	eax	eax+%2	eax+2%2	%1+4%2	ecx	ecx+%2	ecx+2%2	%1+8%2	ecx+4%2
-		"movq %3, %%mm7					\n\t" // mm7 = 0x7F
-		"movq %4, %%mm6					\n\t" // mm6 = 0x7D
+		"movq %3, %%mm7					\n\t" 
+		"movq %4, %%mm6					\n\t" 
+
 		"movq (%1), %%mm0				\n\t"
 		"movq (%%eax), %%mm1				\n\t"
 		"psubb %%mm1, %%mm0				\n\t" // mm0 = differnece
@@ -119,7 +120,7 @@
 #endif
 		"movd %%mm0, %0					\n\t"
 		: "=r" (numEq)
-		: "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold)
+		: "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
 		: "%eax"
 		);
 	numEq= (-numEq) &0xFF;
@@ -150,6 +151,7 @@
 		);
 	return isOk==0;
 #else
+#if 1
 	int x;
 	const int QP= c->QP;
 	src+= stride*3;
@@ -159,6 +161,24 @@
 	}
 
 	return 1;
+#else
+	int x;
+	const int QP= c->QP;
+	src+= stride*4;
+	for(x=0; x<BLOCK_SIZE; x++)
+	{
+		int min=255;
+		int max=0;
+		int y;
+		for(y=0; y<8; y++){
+			int v= src[x + y*stride];
+			if(v>max) max=v;
+			if(v<min) min=v;
+		}
+		if(max-min > 2*QP) return 0;
+	}
+	return 1;
+#endif
 #endif
 }
 
@@ -2639,22 +2659,23 @@
 	int black=0, white=255; // blackest black and whitest white in the picture
 	int QPCorrecture= 256*256;
 
-	int copyAhead;
+	int copyAhead, i;
 
 	//FIXME remove
 	uint64_t * const yHistogram= c.yHistogram;
 	uint8_t * const tempSrc= c.tempSrc;
 	uint8_t * const tempDst= c.tempDst;
-
-	c.dcOffset= c.ppMode.maxDcDiff;
-	c.dcThreshold= c.ppMode.maxDcDiff*2 + 1;
+	const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
 
 #ifdef HAVE_MMX
-	c.mmxDcOffset= 0x7F - c.dcOffset;
-	c.mmxDcThreshold= 0x7F - c.dcThreshold;
-
-	c.mmxDcOffset*= 0x0101010101010101LL;
-	c.mmxDcThreshold*= 0x0101010101010101LL;
+	for(i=0; i<32; i++){
+		int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
+		int threshold= offset*2 + 1;
+		c.mmxDcOffset[i]= 0x7F - offset;
+		c.mmxDcThreshold[i]= 0x7F - threshold;
+		c.mmxDcOffset[i]*= 0x0101010101010101LL;
+		c.mmxDcThreshold[i]*= 0x0101010101010101LL;
+	}
 #endif
 
 	if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
@@ -2814,11 +2835,8 @@
 		uint8_t *tempBlock1= c.tempBlocks;
 		uint8_t *tempBlock2= c.tempBlocks + 8;
 #endif
-#ifdef ARCH_X86
 		int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride];
-		int QPDelta= isColor ? (-1) : 1<<31;
-		int QPFrac= 1<<30;
-#endif
+		int *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth];
 		int QP=0;
 		/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
 		   if not than use a temporary buffer */
@@ -2855,28 +2873,19 @@
 #ifdef HAVE_MMX
 			uint8_t *tmpXchg;
 #endif
-#ifdef ARCH_X86
-			QP= *QPptr;
-			asm volatile(
-				"addl %2, %1		\n\t"
-				"sbbl %%eax, %%eax	\n\t"
-				"shll $2, %%eax		\n\t"
-				"subl %%eax, %0		\n\t"
-				: "+r" (QPptr), "+m" (QPFrac)
-				: "r" (QPDelta)
-				: "%eax"
-			);
-#else
-			QP= isColor ?
-                                QPs[(y>>3)*QPStride + (x>>3)]:
-                                QPs[(y>>4)*QPStride + (x>>4)];
-#endif
-			if(!isColor)
+			if(isColor)
 			{
+				QP= QPptr[x>>3];
+				c.nonBQP= nonBQPptr[x>>3];
+			}
+			else
+			{
+				QP= QPptr[x>>4];
 				QP= (QP* QPCorrecture + 256*128)>>16;
+				c.nonBQP= nonBQPptr[x>>4];
+				c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
 				yHistogram[ srcBlock[srcStride*12 + 4] ]++;
 			}
-//printf("%d ", QP);
 			c.QP= QP;
 #ifdef HAVE_MMX
 			asm volatile(