comparison dsputil.c @ 2967:ef2149182f1c libavcodec

COSMETICS: Remove all trailing whitespace.
author diego
date Sat, 17 Dec 2005 18:14:38 +0000
parents fd5d7c732c6b
children bfabfdf9ce55
comparison
equal deleted inserted replaced
2966:564788471dd4 2967:ef2149182f1c
17 * License along with this library; if not, write to the Free Software 17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * 19 *
20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
21 */ 21 */
22 22
23 /** 23 /**
24 * @file dsputil.c 24 * @file dsputil.c
25 * DSP utils 25 * DSP utils
26 */ 26 */
27 27
28 #include "avcodec.h" 28 #include "avcodec.h"
29 #include "dsputil.h" 29 #include "dsputil.h"
30 #include "mpegvideo.h" 30 #include "mpegvideo.h"
31 #include "simple_idct.h" 31 #include "simple_idct.h"
32 #include "faandct.h" 32 #include "faandct.h"
63 63
64 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ 64 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
65 uint16_t __align8 inv_zigzag_direct16[64] = {0, }; 65 uint16_t __align8 inv_zigzag_direct16[64] = {0, };
66 66
67 const uint8_t ff_alternate_horizontal_scan[64] = { 67 const uint8_t ff_alternate_horizontal_scan[64] = {
68 0, 1, 2, 3, 8, 9, 16, 17, 68 0, 1, 2, 3, 8, 9, 16, 17,
69 10, 11, 4, 5, 6, 7, 15, 14, 69 10, 11, 4, 5, 6, 7, 15, 14,
70 13, 12, 19, 18, 24, 25, 32, 33, 70 13, 12, 19, 18, 24, 25, 32, 33,
71 26, 27, 20, 21, 22, 23, 28, 29, 71 26, 27, 20, 21, 22, 23, 28, 29,
72 30, 31, 34, 35, 40, 41, 48, 49, 72 30, 31, 34, 35, 40, 41, 48, 49,
73 42, 43, 36, 37, 38, 39, 44, 45, 73 42, 43, 36, 37, 38, 39, 44, 45,
74 46, 47, 50, 51, 56, 57, 58, 59, 74 46, 47, 50, 51, 56, 57, 58, 59,
75 52, 53, 54, 55, 60, 61, 62, 63, 75 52, 53, 54, 55, 60, 61, 62, 63,
76 }; 76 };
77 77
78 const uint8_t ff_alternate_vertical_scan[64] = { 78 const uint8_t ff_alternate_vertical_scan[64] = {
79 0, 8, 16, 24, 1, 9, 2, 10, 79 0, 8, 16, 24, 1, 9, 2, 10,
80 17, 25, 32, 40, 48, 56, 57, 49, 80 17, 25, 32, 40, 48, 56, 57, 49,
81 41, 33, 26, 18, 3, 11, 4, 12, 81 41, 33, 26, 18, 3, 11, 4, 12,
82 19, 27, 34, 42, 50, 58, 35, 43, 82 19, 27, 34, 42, 50, 58, 35, 43,
83 51, 59, 20, 28, 5, 13, 6, 14, 83 51, 59, 20, 28, 5, 13, 6, 14,
84 21, 29, 36, 44, 52, 60, 37, 45, 84 21, 29, 36, 44, 52, 60, 37, 45,
85 53, 61, 22, 30, 7, 15, 23, 31, 85 53, 61, 22, 30, 7, 15, 23, 31,
86 38, 46, 54, 62, 39, 47, 55, 63, 86 38, 46, 54, 62, 39, 47, 55, 63,
87 }; 87 };
88 88
89 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ 89 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
90 const uint32_t inverse[256]={ 90 const uint32_t inverse[256]={
91 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 91 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
92 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 92 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
93 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, 93 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
94 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, 94 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
95 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, 95 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
96 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, 96 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
97 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, 97 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
98 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, 98 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
99 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, 99 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
100 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, 100 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
101 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, 101 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
102 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, 102 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
103 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, 103 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
104 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, 104 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
105 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, 105 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
106 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, 106 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
107 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, 107 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
108 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, 108 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
109 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, 109 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
110 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, 110 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
111 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, 111 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
112 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, 112 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
113 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, 113 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
114 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, 114 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
115 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, 115 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
116 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, 116 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
117 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, 117 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
118 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, 118 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
119 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, 119 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
120 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, 120 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
121 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, 121 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
122 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 122 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
123 }; 123 };
124 124
125 /* Input permutation for the simple_idct_mmx */ 125 /* Input permutation for the simple_idct_mmx */
126 static const uint8_t simple_mmx_permutation[64]={ 126 static const uint8_t simple_mmx_permutation[64]={
127 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 127 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
128 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 128 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
129 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 129 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
130 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 130 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
131 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 131 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
132 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 132 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
133 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 133 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
134 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, 134 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
135 }; 135 };
136 136
137 static int pix_sum_c(uint8_t * pix, int line_size) 137 static int pix_sum_c(uint8_t * pix, int line_size)
138 { 138 {
204 return s; 204 return s;
205 } 205 }
206 206
207 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ 207 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
208 int i; 208 int i;
209 209
210 for(i=0; i+8<=w; i+=8){ 210 for(i=0; i+8<=w; i+=8){
211 dst[i+0]= bswap_32(src[i+0]); 211 dst[i+0]= bswap_32(src[i+0]);
212 dst[i+1]= bswap_32(src[i+1]); 212 dst[i+1]= bswap_32(src[i+1]);
213 dst[i+2]= bswap_32(src[i+2]); 213 dst[i+2]= bswap_32(src[i+2]);
214 dst[i+3]= bswap_32(src[i+3]); 214 dst[i+3]= bswap_32(src[i+3]);
296 int s, i, j; 296 int s, i, j;
297 const int dec_count= w==8 ? 3 : 4; 297 const int dec_count= w==8 ? 3 : 4;
298 int tmp[16*16]; 298 int tmp[16*16];
299 #if 0 299 #if 0
300 int level, ori; 300 int level, ori;
301 static const int scale[2][2][4][4]={ 301 static const int scale[2][2][4][4]={
302 { 302 {
303 { 303 {
304 //8x8 dec=3 304 //8x8 dec=3
305 {268, 239, 239, 213}, 305 {268, 239, 239, 213},
306 { 0, 224, 224, 152}, 306 { 0, 224, 224, 152},
348 for(ori= level ? 1 : 0; ori<4; ori++){ 348 for(ori= level ? 1 : 0; ori<4; ori++){
349 int sx= (ori&1) ? 1<<level: 0; 349 int sx= (ori&1) ? 1<<level: 0;
350 int stride= 16<<(dec_count-level); 350 int stride= 16<<(dec_count-level);
351 int sy= (ori&2) ? stride>>1 : 0; 351 int sy= (ori&2) ? stride>>1 : 0;
352 int size= 1<<level; 352 int size= 1<<level;
353 353
354 for(i=0; i<size; i++){ 354 for(i=0; i<size; i++){
355 for(j=0; j<size; j++){ 355 for(j=0; j<size; j++){
356 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; 356 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
357 s += ABS(v); 357 s += ABS(v);
358 } 358 }
366 s+= ABS(tmp[16*i+j+1]); 366 s+= ABS(tmp[16*i+j+1]);
367 s+= ABS(tmp[16*i+j+2]); 367 s+= ABS(tmp[16*i+j+2]);
368 s+= ABS(tmp[16*i+j+3]); 368 s+= ABS(tmp[16*i+j+3]);
369 } 369 }
370 } 370 }
371 assert(s>=0); 371 assert(s>=0);
372 372
373 return s>>2; 373 return s>>2;
374 #endif 374 #endif
375 } 375 }
376 376
377 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ 377 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
433 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 433 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
434 int line_size) 434 int line_size)
435 { 435 {
436 int i; 436 int i;
437 uint8_t *cm = cropTbl + MAX_NEG_CROP; 437 uint8_t *cm = cropTbl + MAX_NEG_CROP;
438 438
439 /* read the pixels */ 439 /* read the pixels */
440 for(i=0;i<8;i++) { 440 for(i=0;i<8;i++) {
441 pixels[0] = cm[block[0]]; 441 pixels[0] = cm[block[0]];
442 pixels[1] = cm[block[1]]; 442 pixels[1] = cm[block[1]];
443 pixels[2] = cm[block[2]]; 443 pixels[2] = cm[block[2]];
455 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, 455 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
456 int line_size) 456 int line_size)
457 { 457 {
458 int i; 458 int i;
459 uint8_t *cm = cropTbl + MAX_NEG_CROP; 459 uint8_t *cm = cropTbl + MAX_NEG_CROP;
460 460
461 /* read the pixels */ 461 /* read the pixels */
462 for(i=0;i<4;i++) { 462 for(i=0;i<4;i++) {
463 pixels[0] = cm[block[0]]; 463 pixels[0] = cm[block[0]];
464 pixels[1] = cm[block[1]]; 464 pixels[1] = cm[block[1]];
465 pixels[2] = cm[block[2]]; 465 pixels[2] = cm[block[2]];
473 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, 473 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
474 int line_size) 474 int line_size)
475 { 475 {
476 int i; 476 int i;
477 uint8_t *cm = cropTbl + MAX_NEG_CROP; 477 uint8_t *cm = cropTbl + MAX_NEG_CROP;
478 478
479 /* read the pixels */ 479 /* read the pixels */
480 for(i=0;i<2;i++) { 480 for(i=0;i<2;i++) {
481 pixels[0] = cm[block[0]]; 481 pixels[0] = cm[block[0]];
482 pixels[1] = cm[block[1]]; 482 pixels[1] = cm[block[1]];
483 483
484 pixels += line_size; 484 pixels += line_size;
485 block += 8; 485 block += 8;
486 } 486 }
487 } 487 }
488 488
489 static void put_signed_pixels_clamped_c(const DCTELEM *block, 489 static void put_signed_pixels_clamped_c(const DCTELEM *block,
490 uint8_t *restrict pixels, 490 uint8_t *restrict pixels,
491 int line_size) 491 int line_size)
492 { 492 {
493 int i, j; 493 int i, j;
494 494
510 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 510 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
511 int line_size) 511 int line_size)
512 { 512 {
513 int i; 513 int i;
514 uint8_t *cm = cropTbl + MAX_NEG_CROP; 514 uint8_t *cm = cropTbl + MAX_NEG_CROP;
515 515
516 /* read the pixels */ 516 /* read the pixels */
517 for(i=0;i<8;i++) { 517 for(i=0;i<8;i++) {
518 pixels[0] = cm[pixels[0] + block[0]]; 518 pixels[0] = cm[pixels[0] + block[0]];
519 pixels[1] = cm[pixels[1] + block[1]]; 519 pixels[1] = cm[pixels[1] + block[1]];
520 pixels[2] = cm[pixels[2] + block[2]]; 520 pixels[2] = cm[pixels[2] + block[2]];
531 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, 531 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
532 int line_size) 532 int line_size)
533 { 533 {
534 int i; 534 int i;
535 uint8_t *cm = cropTbl + MAX_NEG_CROP; 535 uint8_t *cm = cropTbl + MAX_NEG_CROP;
536 536
537 /* read the pixels */ 537 /* read the pixels */
538 for(i=0;i<4;i++) { 538 for(i=0;i<4;i++) {
539 pixels[0] = cm[pixels[0] + block[0]]; 539 pixels[0] = cm[pixels[0] + block[0]];
540 pixels[1] = cm[pixels[1] + block[1]]; 540 pixels[1] = cm[pixels[1] + block[1]];
541 pixels[2] = cm[pixels[2] + block[2]]; 541 pixels[2] = cm[pixels[2] + block[2]];
548 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, 548 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
549 int line_size) 549 int line_size)
550 { 550 {
551 int i; 551 int i;
552 uint8_t *cm = cropTbl + MAX_NEG_CROP; 552 uint8_t *cm = cropTbl + MAX_NEG_CROP;
553 553
554 /* read the pixels */ 554 /* read the pixels */
555 for(i=0;i<2;i++) { 555 for(i=0;i<2;i++) {
556 pixels[0] = cm[pixels[0] + block[0]]; 556 pixels[0] = cm[pixels[0] + block[0]];
557 pixels[1] = cm[pixels[1] + block[1]]; 557 pixels[1] = cm[pixels[1] + block[1]];
558 pixels += line_size; 558 pixels += line_size;
1141 dst+= stride; 1141 dst+= stride;
1142 src+= stride; 1142 src+= stride;
1143 } 1143 }
1144 } 1144 }
1145 1145
1146 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, 1146 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1147 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) 1147 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1148 { 1148 {
1149 int y, vx, vy; 1149 int y, vx, vy;
1150 const int s= 1<<shift; 1150 const int s= 1<<shift;
1151 1151
1152 width--; 1152 width--;
1153 height--; 1153 height--;
1154 1154
1155 for(y=0; y<h; y++){ 1155 for(y=0; y<h; y++){
1156 int x; 1156 int x;
1164 src_y= vy>>16; 1164 src_y= vy>>16;
1165 frac_x= src_x&(s-1); 1165 frac_x= src_x&(s-1);
1166 frac_y= src_y&(s-1); 1166 frac_y= src_y&(s-1);
1167 src_x>>=shift; 1167 src_x>>=shift;
1168 src_y>>=shift; 1168 src_y>>=shift;
1169 1169
1170 if((unsigned)src_x < width){ 1170 if((unsigned)src_x < width){
1171 if((unsigned)src_y < height){ 1171 if((unsigned)src_y < height){
1172 index= src_x + src_y*stride; 1172 index= src_x + src_y*stride;
1173 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 1173 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1174 + src[index +1]* frac_x )*(s-frac_y) 1174 + src[index +1]* frac_x )*(s-frac_y)
1175 + ( src[index+stride ]*(s-frac_x) 1175 + ( src[index+stride ]*(s-frac_x)
1176 + src[index+stride+1]* frac_x )* frac_y 1176 + src[index+stride+1]* frac_x )* frac_y
1177 + r)>>(shift*2); 1177 + r)>>(shift*2);
1178 }else{ 1178 }else{
1179 index= src_x + clip(src_y, 0, height)*stride; 1179 index= src_x + clip(src_y, 0, height)*stride;
1180 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 1180 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1181 + src[index +1]* frac_x )*s 1181 + src[index +1]* frac_x )*s
1182 + r)>>(shift*2); 1182 + r)>>(shift*2);
1183 } 1183 }
1184 }else{ 1184 }else{
1185 if((unsigned)src_y < height){ 1185 if((unsigned)src_y < height){
1186 index= clip(src_x, 0, width) + src_y*stride; 1186 index= clip(src_x, 0, width) + src_y*stride;
1187 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) 1187 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
1188 + src[index+stride ]* frac_y )*s 1188 + src[index+stride ]* frac_y )*s
1189 + r)>>(shift*2); 1189 + r)>>(shift*2);
1190 }else{ 1190 }else{
1191 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; 1191 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
1192 dst[y*stride + x]= src[index ]; 1192 dst[y*stride + x]= src[index ];
1193 } 1193 }
1194 } 1194 }
1195 1195
1196 vx+= dxx; 1196 vx+= dxx;
1197 vy+= dyx; 1197 vy+= dyx;
1198 } 1198 }
1199 ox += dxy; 1199 ox += dxy;
1200 oy += dyy; 1200 oy += dyy;
1229 } 1229 }
1230 src += stride; 1230 src += stride;
1231 dst += stride; 1231 dst += stride;
1232 } 1232 }
1233 } 1233 }
1234 1234
1235 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1235 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1236 int i,j; 1236 int i,j;
1237 for (i=0; i < height; i++) { 1237 for (i=0; i < height; i++) {
1238 for (j=0; j < width; j++) { 1238 for (j=0; j < width; j++) {
1239 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; 1239 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1240 } 1240 }
1241 src += stride; 1241 src += stride;
1242 dst += stride; 1242 dst += stride;
1243 } 1243 }
1244 } 1244 }
1245 1245
1246 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1246 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1247 int i,j; 1247 int i,j;
1248 for (i=0; i < height; i++) { 1248 for (i=0; i < height; i++) {
1249 for (j=0; j < width; j++) { 1249 for (j=0; j < width; j++) {
1250 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; 1250 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1326 } 1326 }
1327 src += stride; 1327 src += stride;
1328 dst += stride; 1328 dst += stride;
1329 } 1329 }
1330 } 1330 }
1331 1331
1332 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1332 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1333 int i,j; 1333 int i,j;
1334 for (i=0; i < height; i++) { 1334 for (i=0; i < height; i++) {
1335 for (j=0; j < width; j++) { 1335 for (j=0; j < width; j++) {
1336 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; 1336 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1337 } 1337 }
1338 src += stride; 1338 src += stride;
1339 dst += stride; 1339 dst += stride;
1340 } 1340 }
1341 } 1341 }
1342 1342
1343 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1343 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1344 int i,j; 1344 int i,j;
1345 for (i=0; i < height; i++) { 1345 for (i=0; i < height; i++) {
1346 for (j=0; j < width; j++) { 1346 for (j=0; j < width; j++) {
1347 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 1347 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
2494 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; 2494 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2495 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; 2495 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2496 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; 2496 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2497 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; 2497 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2498 dst+=dstStride; 2498 dst+=dstStride;
2499 src+=srcStride; 2499 src+=srcStride;
2500 } 2500 }
2501 } 2501 }
2502 2502
2503 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ 2503 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2504 uint8_t *cm = cropTbl + MAX_NEG_CROP; 2504 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2578 } 2578 }
2579 2579
2580 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ 2580 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
2581 int x; 2581 int x;
2582 const int strength= ff_h263_loop_filter_strength[qscale]; 2582 const int strength= ff_h263_loop_filter_strength[qscale];
2583 2583
2584 for(x=0; x<8; x++){ 2584 for(x=0; x<8; x++){
2585 int d1, d2, ad1; 2585 int d1, d2, ad1;
2586 int p0= src[x-2*stride]; 2586 int p0= src[x-2*stride];
2587 int p1= src[x-1*stride]; 2587 int p1= src[x-1*stride];
2588 int p2= src[x+0*stride]; 2588 int p2= src[x+0*stride];
2592 if (d<-2*strength) d1= 0; 2592 if (d<-2*strength) d1= 0;
2593 else if(d<- strength) d1=-2*strength - d; 2593 else if(d<- strength) d1=-2*strength - d;
2594 else if(d< strength) d1= d; 2594 else if(d< strength) d1= d;
2595 else if(d< 2*strength) d1= 2*strength - d; 2595 else if(d< 2*strength) d1= 2*strength - d;
2596 else d1= 0; 2596 else d1= 0;
2597 2597
2598 p1 += d1; 2598 p1 += d1;
2599 p2 -= d1; 2599 p2 -= d1;
2600 if(p1&256) p1= ~(p1>>31); 2600 if(p1&256) p1= ~(p1>>31);
2601 if(p2&256) p2= ~(p2>>31); 2601 if(p2&256) p2= ~(p2>>31);
2602 2602
2603 src[x-1*stride] = p1; 2603 src[x-1*stride] = p1;
2604 src[x+0*stride] = p2; 2604 src[x+0*stride] = p2;
2605 2605
2606 ad1= ABS(d1)>>1; 2606 ad1= ABS(d1)>>1;
2607 2607
2608 d2= clip((p0-p3)/4, -ad1, ad1); 2608 d2= clip((p0-p3)/4, -ad1, ad1);
2609 2609
2610 src[x-2*stride] = p0 - d2; 2610 src[x-2*stride] = p0 - d2;
2611 src[x+ stride] = p3 + d2; 2611 src[x+ stride] = p3 + d2;
2612 } 2612 }
2613 } 2613 }
2614 2614
2615 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ 2615 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2616 int y; 2616 int y;
2617 const int strength= ff_h263_loop_filter_strength[qscale]; 2617 const int strength= ff_h263_loop_filter_strength[qscale];
2618 2618
2619 for(y=0; y<8; y++){ 2619 for(y=0; y<8; y++){
2620 int d1, d2, ad1; 2620 int d1, d2, ad1;
2621 int p0= src[y*stride-2]; 2621 int p0= src[y*stride-2];
2622 int p1= src[y*stride-1]; 2622 int p1= src[y*stride-1];
2623 int p2= src[y*stride+0]; 2623 int p2= src[y*stride+0];
2627 if (d<-2*strength) d1= 0; 2627 if (d<-2*strength) d1= 0;
2628 else if(d<- strength) d1=-2*strength - d; 2628 else if(d<- strength) d1=-2*strength - d;
2629 else if(d< strength) d1= d; 2629 else if(d< strength) d1= d;
2630 else if(d< 2*strength) d1= 2*strength - d; 2630 else if(d< 2*strength) d1= 2*strength - d;
2631 else d1= 0; 2631 else d1= 0;
2632 2632
2633 p1 += d1; 2633 p1 += d1;
2634 p2 -= d1; 2634 p2 -= d1;
2635 if(p1&256) p1= ~(p1>>31); 2635 if(p1&256) p1= ~(p1>>31);
2636 if(p2&256) p2= ~(p2>>31); 2636 if(p2&256) p2= ~(p2>>31);
2637 2637
2638 src[y*stride-1] = p1; 2638 src[y*stride-1] = p1;
2639 src[y*stride+0] = p2; 2639 src[y*stride+0] = p2;
2640 2640
2641 ad1= ABS(d1)>>1; 2641 ad1= ABS(d1)>>1;
2642 2642
2643 d2= clip((p0-p3)/4, -ad1, ad1); 2643 d2= clip((p0-p3)/4, -ad1, ad1);
2644 2644
2645 src[y*stride-2] = p0 - d2; 2645 src[y*stride-2] = p0 - d2;
2646 src[y*stride+1] = p3 + d2; 2646 src[y*stride+1] = p3 + d2;
2647 } 2647 }
2648 } 2648 }
2649 2649
2660 xy = y * stride + x; 2660 xy = y * stride + x;
2661 yz = y * 8 + x; 2661 yz = y * 8 + x;
2662 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; 2662 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
2663 } 2663 }
2664 } 2664 }
2665 2665
2666 for(y=0; y<8; y++){ 2666 for(y=0; y<8; y++){
2667 src[ y*stride] = (temp[ y*8] + 2)>>2; 2667 src[ y*stride] = (temp[ y*8] + 2)>>2;
2668 src[7+y*stride] = (temp[7+y*8] + 2)>>2; 2668 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2669 for(x=1; x<7; x++){ 2669 for(x=1; x<7; x++){
2670 xy = y * stride + x; 2670 xy = y * stride + x;
2687 const int p1 = pix[-2*xstride]; 2687 const int p1 = pix[-2*xstride];
2688 const int p2 = pix[-3*xstride]; 2688 const int p2 = pix[-3*xstride];
2689 const int q0 = pix[0]; 2689 const int q0 = pix[0];
2690 const int q1 = pix[1*xstride]; 2690 const int q1 = pix[1*xstride];
2691 const int q2 = pix[2*xstride]; 2691 const int q2 = pix[2*xstride];
2692 2692
2693 if( ABS( p0 - q0 ) < alpha && 2693 if( ABS( p0 - q0 ) < alpha &&
2694 ABS( p1 - p0 ) < beta && 2694 ABS( p1 - p0 ) < beta &&
2695 ABS( q1 - q0 ) < beta ) { 2695 ABS( q1 - q0 ) < beta ) {
2696 2696
2697 int tc = tc0[i]; 2697 int tc = tc0[i];
2698 int i_delta; 2698 int i_delta;
2699 2699
2700 if( ABS( p2 - p0 ) < beta ) { 2700 if( ABS( p2 - p0 ) < beta ) {
2701 pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); 2701 pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
2702 tc++; 2702 tc++;
2703 } 2703 }
2704 if( ABS( q2 - q0 ) < beta ) { 2704 if( ABS( q2 - q0 ) < beta ) {
2705 pix[ xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); 2705 pix[ xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
2706 tc++; 2706 tc++;
2707 } 2707 }
2708 2708
2709 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 2709 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
2710 pix[-xstride] = clip_uint8( p0 + i_delta ); /* p0' */ 2710 pix[-xstride] = clip_uint8( p0 + i_delta ); /* p0' */
2711 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ 2711 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
2712 } 2712 }
2713 pix += ystride; 2713 pix += ystride;
3017 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ 3017 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3018 MpegEncContext *c = v; 3018 MpegEncContext *c = v;
3019 int score1=0; 3019 int score1=0;
3020 int score2=0; 3020 int score2=0;
3021 int x,y; 3021 int x,y;
3022 3022
3023 for(y=0; y<h; y++){ 3023 for(y=0; y<h; y++){
3024 for(x=0; x<8; x++){ 3024 for(x=0; x<8; x++){
3025 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); 3025 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3026 } 3026 }
3027 if(y+1<h){ 3027 if(y+1<h){
3033 } 3033 }
3034 } 3034 }
3035 s1+= stride; 3035 s1+= stride;
3036 s2+= stride; 3036 s2+= stride;
3037 } 3037 }
3038 3038
3039 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; 3039 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
3040 else return score1 + ABS(score2)*8; 3040 else return score1 + ABS(score2)*8;
3041 } 3041 }
3042 3042
3043 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ 3043 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
3058 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ 3058 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
3059 int i; 3059 int i;
3060 3060
3061 for(i=0; i<8*8; i++){ 3061 for(i=0; i<8*8; i++){
3062 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); 3062 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
3063 } 3063 }
3064 } 3064 }
3065 3065
3066 /** 3066 /**
3067 * permutes an 8x8 block. 3067 * permutes an 8x8 block.
3068 * @param block the block which will be permuted according to the given permutation vector 3068 * @param block the block which will be permuted according to the given permutation vector
3069 * @param permutation the permutation vector 3069 * @param permutation the permutation vector
3070 * @param last the last non zero coefficient in scantable order, used to speed the permutation up 3070 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
3071 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not 3071 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
3072 * (inverse) permutated to scantable order! 3072 * (inverse) permutated to scantable order!
3073 */ 3073 */
3074 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) 3074 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
3075 { 3075 {
3076 int i; 3076 int i;
3077 DCTELEM temp[64]; 3077 DCTELEM temp[64];
3078 3078
3079 if(last<=0) return; 3079 if(last<=0) return;
3080 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms 3080 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
3081 3081
3082 for(i=0; i<=last; i++){ 3082 for(i=0; i<=last; i++){
3083 const int j= scantable[i]; 3083 const int j= scantable[i];
3084 temp[j]= block[j]; 3084 temp[j]= block[j];
3085 block[j]=0; 3085 block[j]=0;
3086 } 3086 }
3087 3087
3088 for(i=0; i<=last; i++){ 3088 for(i=0; i<=last; i++){
3089 const int j= scantable[i]; 3089 const int j= scantable[i];
3090 const int perm_j= permutation[j]; 3090 const int perm_j= permutation[j];
3091 block[perm_j]= temp[j]; 3091 block[perm_j]= temp[j];
3092 } 3092 }
3096 return 0; 3096 return 0;
3097 } 3097 }
3098 3098
3099 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ 3099 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
3100 int i; 3100 int i;
3101 3101
3102 memset(cmp, 0, sizeof(void*)*5); 3102 memset(cmp, 0, sizeof(void*)*5);
3103 3103
3104 for(i=0; i<5; i++){ 3104 for(i=0; i<5; i++){
3105 switch(type&0xFF){ 3105 switch(type&0xFF){
3106 case FF_CMP_SAD: 3106 case FF_CMP_SAD:
3107 cmp[i]= c->sad[i]; 3107 cmp[i]= c->sad[i];
3108 break; 3108 break;
3201 for(i=0; i<w; i++){ 3201 for(i=0; i<w; i++){
3202 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); 3202 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
3203 lt= src1[i]; 3203 lt= src1[i];
3204 l= src2[i]; 3204 l= src2[i];
3205 dst[i]= l - pred; 3205 dst[i]= l - pred;
3206 } 3206 }
3207 3207
3208 *left= l; 3208 *left= l;
3209 *left_top= lt; 3209 *left_top= lt;
3210 } 3210 }
3211 3211
3226 3226
3227 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 3227 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
3228 int i; 3228 int i;
3229 int temp[64]; 3229 int temp[64];
3230 int sum=0; 3230 int sum=0;
3231 3231
3232 assert(h==8); 3232 assert(h==8);
3233 3233
3234 for(i=0; i<8; i++){ 3234 for(i=0; i<8; i++){
3235 //FIXME try pointer walks 3235 //FIXME try pointer walks
3236 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); 3236 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
3237 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); 3237 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
3238 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); 3238 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
3239 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); 3239 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
3240 3240
3241 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); 3241 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3242 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); 3242 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3243 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); 3243 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3244 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); 3244 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3245 3245
3246 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); 3246 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3247 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); 3247 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3248 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); 3248 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3249 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); 3249 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3250 } 3250 }
3252 for(i=0; i<8; i++){ 3252 for(i=0; i<8; i++){
3253 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); 3253 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3254 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); 3254 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3255 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); 3255 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3256 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); 3256 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3257 3257
3258 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); 3258 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3259 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); 3259 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3260 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); 3260 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3261 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); 3261 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3262 3262
3263 sum += 3263 sum +=
3264 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) 3264 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3265 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) 3265 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3266 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) 3266 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3267 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 3267 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3268 } 3268 }
3278 3278
3279 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ 3279 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
3280 int i; 3280 int i;
3281 int temp[64]; 3281 int temp[64];
3282 int sum=0; 3282 int sum=0;
3283 3283
3284 assert(h==8); 3284 assert(h==8);
3285 3285
3286 for(i=0; i<8; i++){ 3286 for(i=0; i<8; i++){
3287 //FIXME try pointer walks 3287 //FIXME try pointer walks
3288 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); 3288 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
3289 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); 3289 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
3290 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); 3290 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
3291 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); 3291 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
3292 3292
3293 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); 3293 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3294 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); 3294 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3295 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); 3295 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3296 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); 3296 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3297 3297
3298 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); 3298 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3299 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); 3299 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3300 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); 3300 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3301 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); 3301 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3302 } 3302 }
3304 for(i=0; i<8; i++){ 3304 for(i=0; i<8; i++){
3305 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); 3305 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3306 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); 3306 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3307 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); 3307 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3308 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); 3308 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3309 3309
3310 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); 3310 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3311 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); 3311 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3312 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); 3312 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3313 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); 3313 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3314 3314
3315 sum += 3315 sum +=
3316 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) 3316 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3317 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) 3317 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3318 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) 3318 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3319 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 3319 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3320 } 3320 }
3321 3321
3322 sum -= ABS(temp[8*0] + temp[8*4]); // -mean 3322 sum -= ABS(temp[8*0] + temp[8*4]); // -mean
3323 3323
3324 return sum; 3324 return sum;
3325 } 3325 }
3326 3326
3327 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3327 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3328 MpegEncContext * const s= (MpegEncContext *)c; 3328 MpegEncContext * const s= (MpegEncContext *)c;
3329 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; 3329 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3330 DCTELEM * const temp= (DCTELEM*)aligned_temp; 3330 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3331 int sum=0, i; 3331 int sum=0, i;
3332 3332
3333 assert(h==8); 3333 assert(h==8);
3334 3334
3335 s->dsp.diff_pixels(temp, src1, src2, stride); 3335 s->dsp.diff_pixels(temp, src1, src2, stride);
3336 s->dsp.fdct(temp); 3336 s->dsp.fdct(temp);
3337 3337
3338 for(i=0; i<64; i++) 3338 for(i=0; i<64; i++)
3339 sum+= ABS(temp[i]); 3339 sum+= ABS(temp[i]);
3340 3340
3341 return sum; 3341 return sum;
3342 } 3342 }
3343 3343
3344 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3344 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3345 MpegEncContext * const s= (MpegEncContext *)c; 3345 MpegEncContext * const s= (MpegEncContext *)c;
3346 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; 3346 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3347 DCTELEM * const temp= (DCTELEM*)aligned_temp; 3347 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3348 int sum=0, i; 3348 int sum=0, i;
3349 3349
3350 assert(h==8); 3350 assert(h==8);
3351 3351
3352 s->dsp.diff_pixels(temp, src1, src2, stride); 3352 s->dsp.diff_pixels(temp, src1, src2, stride);
3353 s->dsp.fdct(temp); 3353 s->dsp.fdct(temp);
3354 3354
3355 for(i=0; i<64; i++) 3355 for(i=0; i<64; i++)
3356 sum= FFMAX(sum, ABS(temp[i])); 3356 sum= FFMAX(sum, ABS(temp[i]));
3357 3357
3358 return sum; 3358 return sum;
3359 } 3359 }
3360 3360
3361 void simple_idct(DCTELEM *block); //FIXME 3361 void simple_idct(DCTELEM *block); //FIXME
3362 3362
3367 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; 3367 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
3368 int sum=0, i; 3368 int sum=0, i;
3369 3369
3370 assert(h==8); 3370 assert(h==8);
3371 s->mb_intra=0; 3371 s->mb_intra=0;
3372 3372
3373 s->dsp.diff_pixels(temp, src1, src2, stride); 3373 s->dsp.diff_pixels(temp, src1, src2, stride);
3374 3374
3375 memcpy(bak, temp, 64*sizeof(DCTELEM)); 3375 memcpy(bak, temp, 64*sizeof(DCTELEM));
3376 3376
3377 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 3377 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3378 s->dct_unquantize_inter(s, temp, 0, s->qscale); 3378 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3379 simple_idct(temp); //FIXME 3379 simple_idct(temp); //FIXME
3380 3380
3381 for(i=0; i<64; i++) 3381 for(i=0; i<64; i++)
3382 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); 3382 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
3383 3383
3384 return sum; 3384 return sum;
3385 } 3385 }
3386 3386
3387 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3387 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3388 MpegEncContext * const s= (MpegEncContext *)c; 3388 MpegEncContext * const s= (MpegEncContext *)c;
3393 uint8_t * const bak= (uint8_t*)aligned_bak; 3393 uint8_t * const bak= (uint8_t*)aligned_bak;
3394 int i, last, run, bits, level, distoration, start_i; 3394 int i, last, run, bits, level, distoration, start_i;
3395 const int esc_length= s->ac_esc_length; 3395 const int esc_length= s->ac_esc_length;
3396 uint8_t * length; 3396 uint8_t * length;
3397 uint8_t * last_length; 3397 uint8_t * last_length;
3398 3398
3399 assert(h==8); 3399 assert(h==8);
3400 3400
3401 for(i=0; i<8; i++){ 3401 for(i=0; i<8; i++){
3402 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; 3402 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
3403 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; 3403 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
3406 s->dsp.diff_pixels(temp, src1, src2, stride); 3406 s->dsp.diff_pixels(temp, src1, src2, stride);
3407 3407
3408 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 3408 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3409 3409
3410 bits=0; 3410 bits=0;
3411 3411
3412 if (s->mb_intra) { 3412 if (s->mb_intra) {
3413 start_i = 1; 3413 start_i = 1;
3414 length = s->intra_ac_vlc_length; 3414 length = s->intra_ac_vlc_length;
3415 last_length= s->intra_ac_vlc_last_length; 3415 last_length= s->intra_ac_vlc_last_length;
3416 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma 3416 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3417 } else { 3417 } else {
3418 start_i = 0; 3418 start_i = 0;
3419 length = s->inter_ac_vlc_length; 3419 length = s->inter_ac_vlc_length;
3420 last_length= s->inter_ac_vlc_last_length; 3420 last_length= s->inter_ac_vlc_last_length;
3421 } 3421 }
3422 3422
3423 if(last>=start_i){ 3423 if(last>=start_i){
3424 run=0; 3424 run=0;
3425 for(i=start_i; i<last; i++){ 3425 for(i=start_i; i<last; i++){
3426 int j= scantable[i]; 3426 int j= scantable[i];
3427 level= temp[j]; 3427 level= temp[j];
3428 3428
3429 if(level){ 3429 if(level){
3430 level+=64; 3430 level+=64;
3431 if((level&(~127)) == 0){ 3431 if((level&(~127)) == 0){
3432 bits+= length[UNI_AC_ENC_INDEX(run, level)]; 3432 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3433 }else 3433 }else
3435 run=0; 3435 run=0;
3436 }else 3436 }else
3437 run++; 3437 run++;
3438 } 3438 }
3439 i= scantable[last]; 3439 i= scantable[last];
3440 3440
3441 level= temp[i] + 64; 3441 level= temp[i] + 64;
3442 3442
3443 assert(level - 64); 3443 assert(level - 64);
3444 3444
3445 if((level&(~127)) == 0){ 3445 if((level&(~127)) == 0){
3446 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; 3446 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3447 }else 3447 }else
3448 bits+= esc_length; 3448 bits+= esc_length;
3449 3449
3450 } 3450 }
3451 3451
3452 if(last>=0){ 3452 if(last>=0){
3453 if(s->mb_intra) 3453 if(s->mb_intra)
3454 s->dct_unquantize_intra(s, temp, 0, s->qscale); 3454 s->dct_unquantize_intra(s, temp, 0, s->qscale);
3455 else 3455 else
3456 s->dct_unquantize_inter(s, temp, 0, s->qscale); 3456 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3457 } 3457 }
3458 3458
3459 s->dsp.idct_add(bak, stride, temp); 3459 s->dsp.idct_add(bak, stride, temp);
3460 3460
3461 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); 3461 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
3462 3462
3463 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); 3463 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3464 } 3464 }
3465 3465
3472 const int esc_length= s->ac_esc_length; 3472 const int esc_length= s->ac_esc_length;
3473 uint8_t * length; 3473 uint8_t * length;
3474 uint8_t * last_length; 3474 uint8_t * last_length;
3475 3475
3476 assert(h==8); 3476 assert(h==8);
3477 3477
3478 s->dsp.diff_pixels(temp, src1, src2, stride); 3478 s->dsp.diff_pixels(temp, src1, src2, stride);
3479 3479
3480 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 3480 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3481 3481
3482 bits=0; 3482 bits=0;
3483 3483
3484 if (s->mb_intra) { 3484 if (s->mb_intra) {
3485 start_i = 1; 3485 start_i = 1;
3486 length = s->intra_ac_vlc_length; 3486 length = s->intra_ac_vlc_length;
3487 last_length= s->intra_ac_vlc_last_length; 3487 last_length= s->intra_ac_vlc_last_length;
3488 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma 3488 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3489 } else { 3489 } else {
3490 start_i = 0; 3490 start_i = 0;
3491 length = s->inter_ac_vlc_length; 3491 length = s->inter_ac_vlc_length;
3492 last_length= s->inter_ac_vlc_last_length; 3492 last_length= s->inter_ac_vlc_last_length;
3493 } 3493 }
3494 3494
3495 if(last>=start_i){ 3495 if(last>=start_i){
3496 run=0; 3496 run=0;
3497 for(i=start_i; i<last; i++){ 3497 for(i=start_i; i<last; i++){
3498 int j= scantable[i]; 3498 int j= scantable[i];
3499 level= temp[j]; 3499 level= temp[j];
3500 3500
3501 if(level){ 3501 if(level){
3502 level+=64; 3502 level+=64;
3503 if((level&(~127)) == 0){ 3503 if((level&(~127)) == 0){
3504 bits+= length[UNI_AC_ENC_INDEX(run, level)]; 3504 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3505 }else 3505 }else
3507 run=0; 3507 run=0;
3508 }else 3508 }else
3509 run++; 3509 run++;
3510 } 3510 }
3511 i= scantable[last]; 3511 i= scantable[last];
3512 3512
3513 level= temp[i] + 64; 3513 level= temp[i] + 64;
3514 3514
3515 assert(level - 64); 3515 assert(level - 64);
3516 3516
3517 if((level&(~127)) == 0){ 3517 if((level&(~127)) == 0){
3518 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; 3518 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3519 }else 3519 }else
3520 bits+= esc_length; 3520 bits+= esc_length;
3521 } 3521 }
3524 } 3524 }
3525 3525
3526 static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ 3526 static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3527 int score=0; 3527 int score=0;
3528 int x,y; 3528 int x,y;
3529 3529
3530 for(y=1; y<h; y++){ 3530 for(y=1; y<h; y++){
3531 for(x=0; x<16; x+=4){ 3531 for(x=0; x<16; x+=4){
3532 score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) 3532 score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride])
3533 +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); 3533 +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
3534 } 3534 }
3535 s+= stride; 3535 s+= stride;
3536 } 3536 }
3537 3537
3538 return score; 3538 return score;
3539 } 3539 }
3540 3540
3541 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ 3541 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3542 int score=0; 3542 int score=0;
3543 int x,y; 3543 int x,y;
3544 3544
3545 for(y=1; y<h; y++){ 3545 for(y=1; y<h; y++){
3546 for(x=0; x<16; x++){ 3546 for(x=0; x<16; x++){
3547 score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); 3547 score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3548 } 3548 }
3549 s1+= stride; 3549 s1+= stride;
3550 s2+= stride; 3550 s2+= stride;
3551 } 3551 }
3552 3552
3553 return score; 3553 return score;
3554 } 3554 }
3555 3555
3556 #define SQ(a) ((a)*(a)) 3556 #define SQ(a) ((a)*(a))
3557 static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ 3557 static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3558 int score=0; 3558 int score=0;
3559 int x,y; 3559 int x,y;
3560 3560
3561 for(y=1; y<h; y++){ 3561 for(y=1; y<h; y++){
3562 for(x=0; x<16; x+=4){ 3562 for(x=0; x<16; x+=4){
3563 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) 3563 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
3564 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); 3564 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
3565 } 3565 }
3566 s+= stride; 3566 s+= stride;
3567 } 3567 }
3568 3568
3569 return score; 3569 return score;
3570 } 3570 }
3571 3571
3572 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ 3572 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3573 int score=0; 3573 int score=0;
3574 int x,y; 3574 int x,y;
3575 3575
3576 for(y=1; y<h; y++){ 3576 for(y=1; y<h; y++){
3577 for(x=0; x<16; x++){ 3577 for(x=0; x<16; x++){
3578 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); 3578 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3579 } 3579 }
3580 s1+= stride; 3580 s1+= stride;
3581 s2+= stride; 3581 s2+= stride;
3582 } 3582 }
3583 3583
3584 return score; 3584 return score;
3585 } 3585 }
3586 3586
3587 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) 3587 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
3588 WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) 3588 WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
3648 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; 3648 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
3649 for(i=0;i<MAX_NEG_CROP;i++) { 3649 for(i=0;i<MAX_NEG_CROP;i++) {
3650 cropTbl[i] = 0; 3650 cropTbl[i] = 0;
3651 cropTbl[i + MAX_NEG_CROP + 256] = 255; 3651 cropTbl[i + MAX_NEG_CROP + 256] = 255;
3652 } 3652 }
3653 3653
3654 for(i=0;i<512;i++) { 3654 for(i=0;i<512;i++) {
3655 squareTbl[i] = (i - 256) * (i - 256); 3655 squareTbl[i] = (i - 256) * (i - 256);
3656 } 3656 }
3657 3657
3658 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; 3658 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
3659 } 3659 }
3660 3660
3661 3661
3662 void dsputil_init(DSPContext* c, AVCodecContext *avctx) 3662 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3665 3665
3666 #ifdef CONFIG_ENCODERS 3666 #ifdef CONFIG_ENCODERS
3667 if(avctx->dct_algo==FF_DCT_FASTINT) { 3667 if(avctx->dct_algo==FF_DCT_FASTINT) {
3668 c->fdct = fdct_ifast; 3668 c->fdct = fdct_ifast;
3669 c->fdct248 = fdct_ifast248; 3669 c->fdct248 = fdct_ifast248;
3670 } 3670 }
3671 else if(avctx->dct_algo==FF_DCT_FAAN) { 3671 else if(avctx->dct_algo==FF_DCT_FAAN) {
3672 c->fdct = ff_faandct; 3672 c->fdct = ff_faandct;
3673 c->fdct248 = ff_faandct248; 3673 c->fdct248 = ff_faandct248;
3674 } 3674 }
3675 else { 3675 else {
3676 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default 3676 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
3677 c->fdct248 = ff_fdct248_islow; 3677 c->fdct248 = ff_fdct248_islow;
3678 } 3678 }
3679 #endif //CONFIG_ENCODERS 3679 #endif //CONFIG_ENCODERS
3859 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; 3859 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
3860 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; 3860 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
3861 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; 3861 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
3862 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; 3862 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3863 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; 3863 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
3864 3864
3865 #define SET_CMP_FUNC(name) \ 3865 #define SET_CMP_FUNC(name) \
3866 c->name[0]= name ## 16_c;\ 3866 c->name[0]= name ## 16_c;\
3867 c->name[1]= name ## 8x8_c; 3867 c->name[1]= name ## 8x8_c;
3868 3868
3869 SET_CMP_FUNC(hadamard8_diff) 3869 SET_CMP_FUNC(hadamard8_diff)
3870 c->hadamard8_diff[4]= hadamard8_intra16_c; 3870 c->hadamard8_diff[4]= hadamard8_intra16_c;
3871 SET_CMP_FUNC(dct_sad) 3871 SET_CMP_FUNC(dct_sad)
3872 SET_CMP_FUNC(dct_max) 3872 SET_CMP_FUNC(dct_max)
3873 c->sad[0]= pix_abs16_c; 3873 c->sad[0]= pix_abs16_c;
3898 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; 3898 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
3899 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; 3899 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
3900 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; 3900 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
3901 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; 3901 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
3902 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; 3902 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
3903 3903
3904 c->h263_h_loop_filter= h263_h_loop_filter_c; 3904 c->h263_h_loop_filter= h263_h_loop_filter_c;
3905 c->h263_v_loop_filter= h263_v_loop_filter_c; 3905 c->h263_v_loop_filter= h263_v_loop_filter_c;
3906 3906
3907 c->h261_loop_filter= h261_loop_filter_c; 3907 c->h261_loop_filter= h261_loop_filter_c;
3908 3908
3909 c->try_8x8basis= try_8x8basis_c; 3909 c->try_8x8basis= try_8x8basis_c;
3910 c->add_8x8basis= add_8x8basis_c; 3910 c->add_8x8basis= add_8x8basis_c;
3911 3911
3912 #ifdef HAVE_MMX 3912 #ifdef HAVE_MMX
3913 dsputil_init_mmx(c, avctx); 3913 dsputil_init_mmx(c, avctx);