changeset 17558:ad90899eeee6

AltiVec operations need to have memory aligned on 16-byte boundaries. patch by Alan Curry, pacman at world dot std dot com
author diego
date Wed, 08 Feb 2006 08:20:40 +0000
parents 3f863d1d8b43
children 4ae8c0169017
files postproc/swscale.c postproc/yuv2rgb_altivec.c
diffstat 2 files changed, 10 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/postproc/swscale.c	Wed Feb 08 08:16:53 2006 +0000
+++ b/postproc/swscale.c	Wed Feb 08 08:20:40 2006 +0000
@@ -1166,7 +1166,8 @@
 	}
 
 	// Note the +1 is for the MMXscaler which reads over the end
-	*outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
+	/* align at 16 for AltiVec (needed by hScale_altivec_real) */
+	*outFilter= (int16_t*)memalign(16, *outFilterSize*(dstW+1)*sizeof(int16_t));
 	memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
 
 	/* Normalize & Store in outFilter */
@@ -2132,10 +2133,11 @@
 	c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
 	c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
 	//Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
+	/* align at 16 bytes for AltiVec */
 	for(i=0; i<c->vLumBufSize; i++)
-		c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
+		c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(16, 4000);
 	for(i=0; i<c->vChrBufSize; i++)
-		c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
+		c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(16, 8000);
 
 	//try to avoid drawing green stuff between the right end and the stride end
 	for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
--- a/postproc/yuv2rgb_altivec.c	Wed Feb 08 08:16:53 2006 +0000
+++ b/postproc/yuv2rgb_altivec.c	Wed Feb 08 08:20:40 2006 +0000
@@ -68,6 +68,9 @@
 #include <inttypes.h>
 #include <assert.h>
 #include "config.h"
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
@@ -788,8 +791,8 @@
 
   vector signed short *YCoeffs, *CCoeffs;
 
-  vYCoeffsBank = malloc (sizeof (vector signed short)*lumFilterSize*c->dstH);
-  vCCoeffsBank = malloc (sizeof (vector signed short)*chrFilterSize*c->dstH);
+  vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
+  vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
 
   for (i=0;i<lumFilterSize*c->dstH;i++) {
     tmp = c->vLumFilter[i];