comparison postproc/swscale.c @ 17558:ad90899eeee6

AltiVec operations need to have memory aligned on 16-byte boundaries. patch by Alan Curry, pacman at world dot std dot com
author diego
date Wed, 08 Feb 2006 08:20:40 +0000
parents 04d6525d59d6
children f580a7755ac5
comparison
equal deleted inserted replaced
17557:3f863d1d8b43 17558:ad90899eeee6
1164 (*filterPos)[i]= srcW - filterSize; 1164 (*filterPos)[i]= srcW - filterSize;
1165 } 1165 }
1166 } 1166 }
1167 1167
1168 // Note the +1 is for the MMXscaler which reads over the end 1168 // Note the +1 is for the MMXscaler which reads over the end
1169 *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t)); 1169 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1170 *outFilter= (int16_t*)memalign(16, *outFilterSize*(dstW+1)*sizeof(int16_t));
1170 memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t)); 1171 memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
1171 1172
1172 /* Normalize & Store in outFilter */ 1173 /* Normalize & Store in outFilter */
1173 for(i=0; i<dstW; i++) 1174 for(i=0; i<dstW; i++)
1174 { 1175 {
2130 2131
2131 // allocate pixbufs (we use dynamic allocation because otherwise we would need to 2132 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2132 c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*)); 2133 c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
2133 c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*)); 2134 c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
2134 //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000) 2135 //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2136 /* align at 16 bytes for AltiVec */
2135 for(i=0; i<c->vLumBufSize; i++) 2137 for(i=0; i<c->vLumBufSize; i++)
2136 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000); 2138 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(16, 4000);
2137 for(i=0; i<c->vChrBufSize; i++) 2139 for(i=0; i<c->vChrBufSize; i++)
2138 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000); 2140 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(16, 8000);
2139 2141
2140 //try to avoid drawing green stuff between the right end and the stride end 2142 //try to avoid drawing green stuff between the right end and the stride end
2141 for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000); 2143 for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
2142 for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000); 2144 for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2143 2145