diff libswscale/swscale.c @ 19172:bae6c99a99cc

vertical scaler with accurate rounding, some people on doom9 can see +-1 errors the +-1 issue is limited to >2tap vertical filters, so bilinear upscale was unaffected the new code is sometime faster sometimes slower but the difference is significant (~20%) so its optional and enabled with arnd=1
author michael
date Mon, 24 Jul 2006 10:36:06 +0000
parents 70ea0a8d3b4a
children e40cf0305d4e
line wrap: on
line diff
--- a/libswscale/swscale.c	Mon Jul 24 10:24:41 2006 +0000
+++ b/libswscale/swscale.c	Mon Jul 24 10:36:06 2006 +0000
@@ -848,7 +848,7 @@
 						dist-1.0);
 }
 
-static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
+static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
 			      int srcW, int dstW, int filterAlign, int one, int flags,
 			      SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
 {
@@ -1127,10 +1127,18 @@
             filterAlign = 1;
         }
 
+        if (flags & SWS_CPU_CAPS_MMX) {
+                // special case for unscaled vertical filtering
+                if(minFilterSize == 1 && filterAlign == 2)
+                        filterAlign= 1;
+        }
+
 	ASSERT(minFilterSize > 0)
 	filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
 	ASSERT(filterSize > 0)
 	filter= av_malloc(filterSize*dstW*sizeof(double));
+        if(filterSize >= MAX_FILTER_SIZE)
+                return -1;
 	*outFilterSize= filterSize;
 
 	if(flags&SWS_PRINT_INFO)
@@ -1216,6 +1224,7 @@
 	}
 
 	av_free(filter);
+        return 0;
 }
 
 #if defined(ARCH_X86) || defined(ARCH_X86_64)
@@ -2115,6 +2124,7 @@
 	/* precalculate vertical scaler filter coefficients */
 	{
 		const int filterAlign=
+		  (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
 		  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
 		  1;