Mercurial > libavcodec.hg
changeset 1012:7a5038ec769b libavcodec
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
uint32 array index -> segv), so let's just use a nice plain
unobfuscated version, which also happens to be faster for me.
author | mellum |
---|---|
date | Sun, 19 Jan 2003 12:06:36 +0000 |
parents | 3b7fcfb9c551 |
children | 5d4c95f323d0 |
files | dsputil.c |
diffstat | 1 files changed, 21 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/dsputil.c Sat Jan 18 01:37:42 2003 +0000 +++ b/dsputil.c Sun Jan 19 12:06:36 2003 +0000 @@ -191,60 +191,32 @@ return s; } -static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) +static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) { - int s, i, j; - UINT32 *sq = squareTbl + 256; + int s, i; + uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j += 8) { -#if 1 -#if LONG_MAX > 2147483647 - uint64_t x,y; - x=*(uint64_t*)pix1; - y=*(uint64_t*)pix2; - - s += sq[(x&0xff) - (y&0xff)]; - s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; - s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; - s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; - s += sq[((x>>32)&0xff) - ((y>>32)&0xff)]; - s += sq[((x>>40)&0xff) - ((y>>40)&0xff)]; - s += sq[((x>>48)&0xff) - ((y>>48)&0xff)]; - s += sq[((x>>56)&0xff) - ((y>>56)&0xff)]; -#else - uint32_t x,y; - x=*(uint32_t*)pix1; - y=*(uint32_t*)pix2; + s += sq[pix1[ 0] - pix2[ 0]]; + s += sq[pix1[ 1] - pix2[ 1]]; + s += sq[pix1[ 2] - pix2[ 2]]; + s += sq[pix1[ 3] - pix2[ 3]]; + s += sq[pix1[ 4] - pix2[ 4]]; + s += sq[pix1[ 5] - pix2[ 5]]; + s += sq[pix1[ 6] - pix2[ 6]]; + s += sq[pix1[ 7] - pix2[ 7]]; + s += sq[pix1[ 8] - pix2[ 8]]; + s += sq[pix1[ 9] - pix2[ 9]]; + s += sq[pix1[10] - pix2[10]]; + s += sq[pix1[11] - pix2[11]]; + s += sq[pix1[12] - pix2[12]]; + s += sq[pix1[13] - pix2[13]]; + s += sq[pix1[14] - pix2[14]]; + s += sq[pix1[15] - pix2[15]]; - s += sq[(x&0xff) - (y&0xff)]; - s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; - s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; - s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; - - x=*(uint32_t*)(pix1+4); - y=*(uint32_t*)(pix2+4); - s += sq[(x&0xff) - (y&0xff)]; - s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; - s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; - s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; -#endif -#else - s += sq[pix1[0] - pix2[0]]; - s += sq[pix1[1] - pix2[1]]; - s += sq[pix1[2] - pix2[2]]; - s += sq[pix1[3] - pix2[3]]; - s += sq[pix1[4] - pix2[4]]; - s += sq[pix1[5] - pix2[5]]; - s += sq[pix1[6] - pix2[6]]; - s += sq[pix1[7] - pix2[7]]; -#endif - pix1 += 8; - pix2 += 8; - } - pix1 += line_size - 16; - pix2 += line_size - 16; + pix1 += line_size; + pix2 += line_size; } return s; }