# HG changeset patch # User lorenm # Date 1203580486 0 # Node ID 40fbc878ce3f4e265904814f2d85441607603d91 # Parent 0a403ade8c812d7b3ae4783bab91696eae13a839 pseudo-simd add_bytes and diff_bytes 2x faster than scalar in 32bit, 4x faster in 64bit (as opposed to 8x in mmx) diff -r 0a403ade8c81 -r 40fbc878ce3f dsputil.c --- a/dsputil.c Thu Feb 21 07:10:46 2008 +0000 +++ b/dsputil.c Thu Feb 21 07:54:46 2008 +0000 @@ -50,6 +50,9 @@ uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; +static const unsigned long pb_7f = 0x7f7f7f7f7f7f7f7fUL; +static const unsigned long pb_80 = 0x8080808080808080UL; + const uint8_t ff_zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, @@ -3276,34 +3279,31 @@ } static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ - int i; - for(i=0; i+7