Mercurial > libavcodec.hg
comparison dsputil.c @ 1012:7a5038ec769b libavcodec
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
uint32 array index -> segv), so let's just use a nice plain
unobfuscated version, which also happens to be faster for me.
author | mellum |
---|---|
date | Sun, 19 Jan 2003 12:06:36 +0000 |
parents | 3b7fcfb9c551 |
children | 5d4c95f323d0 |
comparison
equal
deleted
inserted
replaced
1011:3b7fcfb9c551 | 1012:7a5038ec769b |
---|---|
189 pix2 += line_size; | 189 pix2 += line_size; |
190 } | 190 } |
191 return s; | 191 return s; |
192 } | 192 } |
193 | 193 |
194 static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) | 194 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) |
195 { | 195 { |
196 int s, i, j; | 196 int s, i; |
197 UINT32 *sq = squareTbl + 256; | 197 uint32_t *sq = squareTbl + 256; |
198 | 198 |
199 s = 0; | 199 s = 0; |
200 for (i = 0; i < 16; i++) { | 200 for (i = 0; i < 16; i++) { |
201 for (j = 0; j < 16; j += 8) { | 201 s += sq[pix1[ 0] - pix2[ 0]]; |
202 #if 1 | 202 s += sq[pix1[ 1] - pix2[ 1]]; |
203 #if LONG_MAX > 2147483647 | 203 s += sq[pix1[ 2] - pix2[ 2]]; |
204 uint64_t x,y; | 204 s += sq[pix1[ 3] - pix2[ 3]]; |
205 x=*(uint64_t*)pix1; | 205 s += sq[pix1[ 4] - pix2[ 4]]; |
206 y=*(uint64_t*)pix2; | 206 s += sq[pix1[ 5] - pix2[ 5]]; |
207 | 207 s += sq[pix1[ 6] - pix2[ 6]]; |
208 s += sq[(x&0xff) - (y&0xff)]; | 208 s += sq[pix1[ 7] - pix2[ 7]]; |
209 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; | 209 s += sq[pix1[ 8] - pix2[ 8]]; |
210 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; | 210 s += sq[pix1[ 9] - pix2[ 9]]; |
211 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; | 211 s += sq[pix1[10] - pix2[10]]; |
212 s += sq[((x>>32)&0xff) - ((y>>32)&0xff)]; | 212 s += sq[pix1[11] - pix2[11]]; |
213 s += sq[((x>>40)&0xff) - ((y>>40)&0xff)]; | 213 s += sq[pix1[12] - pix2[12]]; |
214 s += sq[((x>>48)&0xff) - ((y>>48)&0xff)]; | 214 s += sq[pix1[13] - pix2[13]]; |
215 s += sq[((x>>56)&0xff) - ((y>>56)&0xff)]; | 215 s += sq[pix1[14] - pix2[14]]; |
216 #else | 216 s += sq[pix1[15] - pix2[15]]; |
217 uint32_t x,y; | 217 |
218 x=*(uint32_t*)pix1; | 218 pix1 += line_size; |
219 y=*(uint32_t*)pix2; | 219 pix2 += line_size; |
220 | |
221 s += sq[(x&0xff) - (y&0xff)]; | |
222 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; | |
223 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; | |
224 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; | |
225 | |
226 x=*(uint32_t*)(pix1+4); | |
227 y=*(uint32_t*)(pix2+4); | |
228 s += sq[(x&0xff) - (y&0xff)]; | |
229 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; | |
230 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; | |
231 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; | |
232 #endif | |
233 #else | |
234 s += sq[pix1[0] - pix2[0]]; | |
235 s += sq[pix1[1] - pix2[1]]; | |
236 s += sq[pix1[2] - pix2[2]]; | |
237 s += sq[pix1[3] - pix2[3]]; | |
238 s += sq[pix1[4] - pix2[4]]; | |
239 s += sq[pix1[5] - pix2[5]]; | |
240 s += sq[pix1[6] - pix2[6]]; | |
241 s += sq[pix1[7] - pix2[7]]; | |
242 #endif | |
243 pix1 += 8; | |
244 pix2 += 8; | |
245 } | |
246 pix1 += line_size - 16; | |
247 pix2 += line_size - 16; | |
248 } | 220 } |
249 return s; | 221 return s; |
250 } | 222 } |
251 | 223 |
252 static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) | 224 static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) |