comparison dsputil.c @ 1012:7a5038ec769b libavcodec

sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined, uint32 array index -> segv), so let's just use a nice plain unobfuscated version, which also happens to be faster for me.
author mellum
date Sun, 19 Jan 2003 12:06:36 +0000
parents 3b7fcfb9c551
children 5d4c95f323d0
comparison
equal deleted inserted replaced
1011:3b7fcfb9c551 1012:7a5038ec769b
189 pix2 += line_size; 189 pix2 += line_size;
190 } 190 }
191 return s; 191 return s;
192 } 192 }
193 193
194 static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) 194 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
195 { 195 {
196 int s, i, j; 196 int s, i;
197 UINT32 *sq = squareTbl + 256; 197 uint32_t *sq = squareTbl + 256;
198 198
199 s = 0; 199 s = 0;
200 for (i = 0; i < 16; i++) { 200 for (i = 0; i < 16; i++) {
201 for (j = 0; j < 16; j += 8) { 201 s += sq[pix1[ 0] - pix2[ 0]];
202 #if 1 202 s += sq[pix1[ 1] - pix2[ 1]];
203 #if LONG_MAX > 2147483647 203 s += sq[pix1[ 2] - pix2[ 2]];
204 uint64_t x,y; 204 s += sq[pix1[ 3] - pix2[ 3]];
205 x=*(uint64_t*)pix1; 205 s += sq[pix1[ 4] - pix2[ 4]];
206 y=*(uint64_t*)pix2; 206 s += sq[pix1[ 5] - pix2[ 5]];
207 207 s += sq[pix1[ 6] - pix2[ 6]];
208 s += sq[(x&0xff) - (y&0xff)]; 208 s += sq[pix1[ 7] - pix2[ 7]];
209 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; 209 s += sq[pix1[ 8] - pix2[ 8]];
210 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; 210 s += sq[pix1[ 9] - pix2[ 9]];
211 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; 211 s += sq[pix1[10] - pix2[10]];
212 s += sq[((x>>32)&0xff) - ((y>>32)&0xff)]; 212 s += sq[pix1[11] - pix2[11]];
213 s += sq[((x>>40)&0xff) - ((y>>40)&0xff)]; 213 s += sq[pix1[12] - pix2[12]];
214 s += sq[((x>>48)&0xff) - ((y>>48)&0xff)]; 214 s += sq[pix1[13] - pix2[13]];
215 s += sq[((x>>56)&0xff) - ((y>>56)&0xff)]; 215 s += sq[pix1[14] - pix2[14]];
216 #else 216 s += sq[pix1[15] - pix2[15]];
217 uint32_t x,y; 217
218 x=*(uint32_t*)pix1; 218 pix1 += line_size;
219 y=*(uint32_t*)pix2; 219 pix2 += line_size;
220
221 s += sq[(x&0xff) - (y&0xff)];
222 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)];
223 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)];
224 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)];
225
226 x=*(uint32_t*)(pix1+4);
227 y=*(uint32_t*)(pix2+4);
228 s += sq[(x&0xff) - (y&0xff)];
229 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)];
230 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)];
231 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)];
232 #endif
233 #else
234 s += sq[pix1[0] - pix2[0]];
235 s += sq[pix1[1] - pix2[1]];
236 s += sq[pix1[2] - pix2[2]];
237 s += sq[pix1[3] - pix2[3]];
238 s += sq[pix1[4] - pix2[4]];
239 s += sq[pix1[5] - pix2[5]];
240 s += sq[pix1[6] - pix2[6]];
241 s += sq[pix1[7] - pix2[7]];
242 #endif
243 pix1 += 8;
244 pix2 += 8;
245 }
246 pix1 += line_size - 16;
247 pix2 += line_size - 16;
248 } 220 }
249 return s; 221 return s;
250 } 222 }
251 223
252 static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) 224 static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)