changeset 112:a2c063b6ecf9 libavcodec

fixed a bug in the tmp buffer fixed the color range for yuv fixed the width %8!=0 bug (another 1% speed loss)
author michael
date Fri, 19 Oct 2001 13:41:38 +0000
parents 8e4c5a16c9fc
children 3e0dcdb6b340
files libpostproc/postprocess.c libpostproc/postprocess.h libpostproc/postprocess_template.c
diffstat 3 files changed, 103 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/libpostproc/postprocess.c	Wed Oct 17 20:42:07 2001 +0000
+++ b/libpostproc/postprocess.c	Fri Oct 19 13:41:38 2001 +0000
@@ -122,7 +122,7 @@
 static uint64_t temp4=0;
 static uint64_t temp5=0;
 static uint64_t pQPb=0;
-static uint8_t tempBlock[16*16];
+static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
 
 int hFlatnessThreshold= 56 - 16;
 int vFlatnessThreshold= 56 - 16;
@@ -132,7 +132,7 @@
 
 int maxAllowedY=255;
 //FIXME can never make a movieŽs black brighter (anyone needs that?)
-int minAllowedY=0;
+int minAllowedY=16;
 
 #ifdef TIMING
 static inline long long rdtsc()
@@ -2398,6 +2398,13 @@
 	static uint8_t *tempDst= NULL;
 	static uint8_t *tempSrc= NULL;
 
+	/* Temporary buffers for handling the last block */
+	static uint8_t *tempDstBlock= NULL;
+	static uint8_t *tempSrcBlock= NULL;
+
+	uint8_t *dstBlockPtrBackup;
+	uint8_t *srcBlockPtrBackup;
+
 #ifdef TIMING
 	long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
 	sumTime= rdtsc();
@@ -2407,6 +2414,8 @@
 	{
 		tempDst= (uint8_t*)memalign(8, 1024*24);
 		tempSrc= (uint8_t*)memalign(8, 1024*24);
+		tempDstBlock= (uint8_t*)memalign(8, 1024*24);
+		tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
 	}
 
 	if(!yHistogram)
@@ -2414,6 +2423,12 @@
 		int i;
 		yHistogram= (uint64_t*)malloc(8*256);
 		for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
+
+		if(mode & FULL_Y_RANGE)
+		{
+			maxAllowedY=255;
+			minAllowedY=0;
+		}
 	}
 
 	if(!isColor)
@@ -2505,6 +2520,7 @@
 			srcBlock= tempSrc;
 		}
 
+		// From this point on it is guranteed that we can read and write 16 lines downward
 		// finish 1 block before the next otherwise weŽll might have a problem
 		// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
 		for(x=0; x<width; x+=BLOCK_SIZE)
@@ -2545,6 +2561,23 @@
 
 			if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
 
+			//can we mess with a 8x16 block, if not use a temp buffer, yes again
+			if(x+7 >= width)
+			{
+				int i;
+				dstBlockPtrBackup= dstBlock;
+				srcBlockPtrBackup= srcBlock;
+
+				for(i=0;i<BLOCK_SIZE*2; i++)
+				{
+					memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
+					memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
+				}
+
+				dstBlock= tempDstBlock;
+				srcBlock= tempSrcBlock;
+			}
+
 			blockCopy(dstBlock + dstStride*5, dstStride,
 				srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
 
@@ -2593,7 +2626,7 @@
 			}
 
 			/* check if we have a previous block to deblock it with dstBlock */
-			if(x - 8 >= 0 && x<width)
+			if(x - 8 >= 0)
 			{
 #ifdef MORE_TIMING
 				T0= rdtsc();
@@ -2624,12 +2657,25 @@
 				dering(dstBlock - stride*9 + width-9, stride, QP);
 			//FIXME dering filter will not be applied to last block (bottom right)
 
+			/* did we use a tmp-block buffer */
+			if(x+7 >= width)
+			{
+				int i;
+				dstBlock= dstBlockPtrBackup;
+				srcBlock= srcBlockPtrBackup;
+
+				for(i=0;i<BLOCK_SIZE*2; i++)
+				{
+					memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
+				}
+			}
+
 			dstBlock+=8;
 			srcBlock+=8;
 		}
 
 		/* did we use a tmp buffer */
-		if(y+15 > height)
+		if(y+15 >= height)
 		{
 			uint8_t *dstBlock= &(dst[y*dstStride]);
 			memcpy(dstBlock, tempDst, dstStride*(height-y) );
--- a/libpostproc/postprocess.h	Wed Oct 17 20:42:07 2001 +0000
+++ b/libpostproc/postprocess.h	Fri Oct 19 13:41:38 2001 +0000
@@ -46,6 +46,9 @@
 #define H_RK1_FILTER	0x1000			// 4096 (not implemented yet)
 #define H_X1_FILTER	0x2000			// 8192
 
+// select between full y range (255-0) or standart one (
+#define FULL_Y_RANGE	0x8000			// 32768
+
 //Deinterlacing Filters
 #define	LINEAR_IPOL_DEINT_FILTER	0x10000	// 65536
 #define	LINEAR_BLEND_DEINT_FILTER	0x20000	// 131072
--- a/libpostproc/postprocess_template.c	Wed Oct 17 20:42:07 2001 +0000
+++ b/libpostproc/postprocess_template.c	Fri Oct 19 13:41:38 2001 +0000
@@ -122,7 +122,7 @@
 static uint64_t temp4=0;
 static uint64_t temp5=0;
 static uint64_t pQPb=0;
-static uint8_t tempBlock[16*16];
+static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
 
 int hFlatnessThreshold= 56 - 16;
 int vFlatnessThreshold= 56 - 16;
@@ -132,7 +132,7 @@
 
 int maxAllowedY=255;
 //FIXME can never make a movieŽs black brighter (anyone needs that?)
-int minAllowedY=0;
+int minAllowedY=16;
 
 #ifdef TIMING
 static inline long long rdtsc()
@@ -2398,6 +2398,13 @@
 	static uint8_t *tempDst= NULL;
 	static uint8_t *tempSrc= NULL;
 
+	/* Temporary buffers for handling the last block */
+	static uint8_t *tempDstBlock= NULL;
+	static uint8_t *tempSrcBlock= NULL;
+
+	uint8_t *dstBlockPtrBackup;
+	uint8_t *srcBlockPtrBackup;
+
 #ifdef TIMING
 	long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
 	sumTime= rdtsc();
@@ -2407,6 +2414,8 @@
 	{
 		tempDst= (uint8_t*)memalign(8, 1024*24);
 		tempSrc= (uint8_t*)memalign(8, 1024*24);
+		tempDstBlock= (uint8_t*)memalign(8, 1024*24);
+		tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
 	}
 
 	if(!yHistogram)
@@ -2414,6 +2423,12 @@
 		int i;
 		yHistogram= (uint64_t*)malloc(8*256);
 		for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
+
+		if(mode & FULL_Y_RANGE)
+		{
+			maxAllowedY=255;
+			minAllowedY=0;
+		}
 	}
 
 	if(!isColor)
@@ -2505,6 +2520,7 @@
 			srcBlock= tempSrc;
 		}
 
+		// From this point on it is guranteed that we can read and write 16 lines downward
 		// finish 1 block before the next otherwise weŽll might have a problem
 		// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
 		for(x=0; x<width; x+=BLOCK_SIZE)
@@ -2545,6 +2561,23 @@
 
 			if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
 
+			//can we mess with a 8x16 block, if not use a temp buffer, yes again
+			if(x+7 >= width)
+			{
+				int i;
+				dstBlockPtrBackup= dstBlock;
+				srcBlockPtrBackup= srcBlock;
+
+				for(i=0;i<BLOCK_SIZE*2; i++)
+				{
+					memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
+					memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
+				}
+
+				dstBlock= tempDstBlock;
+				srcBlock= tempSrcBlock;
+			}
+
 			blockCopy(dstBlock + dstStride*5, dstStride,
 				srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
 
@@ -2593,7 +2626,7 @@
 			}
 
 			/* check if we have a previous block to deblock it with dstBlock */
-			if(x - 8 >= 0 && x<width)
+			if(x - 8 >= 0)
 			{
 #ifdef MORE_TIMING
 				T0= rdtsc();
@@ -2624,12 +2657,25 @@
 				dering(dstBlock - stride*9 + width-9, stride, QP);
 			//FIXME dering filter will not be applied to last block (bottom right)
 
+			/* did we use a tmp-block buffer */
+			if(x+7 >= width)
+			{
+				int i;
+				dstBlock= dstBlockPtrBackup;
+				srcBlock= srcBlockPtrBackup;
+
+				for(i=0;i<BLOCK_SIZE*2; i++)
+				{
+					memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
+				}
+			}
+
 			dstBlock+=8;
 			srcBlock+=8;
 		}
 
 		/* did we use a tmp buffer */
-		if(y+15 > height)
+		if(y+15 >= height)
 		{
 			uint8_t *dstBlock= &(dst[y*dstStride]);
 			memcpy(dstBlock, tempDst, dstStride*(height-y) );