comparison simple_idct.c @ 1008:fb6cbb8a04a3 libavcodec

fixing DCTELEM != short
author michaelni
date Wed, 15 Jan 2003 19:21:21 +0000
parents caa77cd960c0
children b32afefe7d33
comparison
equal deleted inserted replaced
1007:b2cf2a1d9a51 1008:fb6cbb8a04a3
65 /* signed 16x16 -> 32 multiply */ 65 /* signed 16x16 -> 32 multiply */
66 #define MUL16(rt, ra, rb) rt = (ra) * (rb) 66 #define MUL16(rt, ra, rb) rt = (ra) * (rb)
67 67
68 #endif 68 #endif
69 69
70 static inline void idctRowCondDC (int16_t * row) 70 static inline void idctRowCondDC (DCTELEM * row)
71 { 71 {
72 int a0, a1, a2, a3, b0, b1, b2, b3; 72 int a0, a1, a2, a3, b0, b1, b2, b3;
73 #ifdef FAST_64BIT 73 #ifdef FAST_64BIT
74 uint64_t temp; 74 uint64_t temp;
75 #else 75 #else
80 #ifdef WORDS_BIGENDIAN 80 #ifdef WORDS_BIGENDIAN
81 #define ROW0_MASK 0xffff000000000000LL 81 #define ROW0_MASK 0xffff000000000000LL
82 #else 82 #else
83 #define ROW0_MASK 0xffffLL 83 #define ROW0_MASK 0xffffLL
84 #endif 84 #endif
85 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | 85 if(sizeof(DCTELEM)==2){
86 ((uint64_t *)row)[1]) == 0) { 86 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
87 temp = (row[0] << 3) & 0xffff; 87 ((uint64_t *)row)[1]) == 0) {
88 temp += temp << 16; 88 temp = (row[0] << 3) & 0xffff;
89 temp += temp << 32; 89 temp += temp << 16;
90 ((uint64_t *)row)[0] = temp; 90 temp += temp << 32;
91 ((uint64_t *)row)[1] = temp; 91 ((uint64_t *)row)[0] = temp;
92 return; 92 ((uint64_t *)row)[1] = temp;
93 } 93 return;
94 }
95 }else{
96 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
97 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
98 return;
99 }
100 }
94 #else 101 #else
95 if (!(((uint32_t*)row)[1] | 102 if(sizeof(DCTELEM)==2){
96 ((uint32_t*)row)[2] | 103 if (!(((uint32_t*)row)[1] |
97 ((uint32_t*)row)[3] | 104 ((uint32_t*)row)[2] |
98 row[1])) { 105 ((uint32_t*)row)[3] |
99 temp = (row[0] << 3) & 0xffff; 106 row[1])) {
100 temp += temp << 16; 107 temp = (row[0] << 3) & 0xffff;
101 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = 108 temp += temp << 16;
102 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; 109 ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
103 return; 110 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
104 } 111 return;
112 }
113 }else{
114 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
115 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
116 return;
117 }
118 }
105 #endif 119 #endif
106 120
107 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); 121 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
108 a1 = a0; 122 a1 = a0;
109 a2 = a0; 123 a2 = a0;
157 row[3] = (a3 + b3) >> ROW_SHIFT; 171 row[3] = (a3 + b3) >> ROW_SHIFT;
158 row[4] = (a3 - b3) >> ROW_SHIFT; 172 row[4] = (a3 - b3) >> ROW_SHIFT;
159 } 173 }
160 174
161 static inline void idctSparseColPut (UINT8 *dest, int line_size, 175 static inline void idctSparseColPut (UINT8 *dest, int line_size,
162 int16_t * col) 176 DCTELEM * col)
163 { 177 {
164 int a0, a1, a2, a3, b0, b1, b2, b3; 178 int a0, a1, a2, a3, b0, b1, b2, b3;
165 UINT8 *cm = cropTbl + MAX_NEG_CROP; 179 UINT8 *cm = cropTbl + MAX_NEG_CROP;
166 180
167 /* XXX: I did that only to give same values as previous code */ 181 /* XXX: I did that only to give same values as previous code */
229 dest += line_size; 243 dest += line_size;
230 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; 244 dest[0] = cm[(a0 - b0) >> COL_SHIFT];
231 } 245 }
232 246
233 static inline void idctSparseColAdd (UINT8 *dest, int line_size, 247 static inline void idctSparseColAdd (UINT8 *dest, int line_size,
234 int16_t * col) 248 DCTELEM * col)
235 { 249 {
236 int a0, a1, a2, a3, b0, b1, b2, b3; 250 int a0, a1, a2, a3, b0, b1, b2, b3;
237 UINT8 *cm = cropTbl + MAX_NEG_CROP; 251 UINT8 *cm = cropTbl + MAX_NEG_CROP;
238 252
239 /* XXX: I did that only to give same values as previous code */ 253 /* XXX: I did that only to give same values as previous code */
300 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; 314 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
301 dest += line_size; 315 dest += line_size;
302 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; 316 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
303 } 317 }
304 318
305 static inline void idctSparseCol (int16_t * col) 319 static inline void idctSparseCol (DCTELEM * col)
306 { 320 {
307 int a0, a1, a2, a3, b0, b1, b2, b3; 321 int a0, a1, a2, a3, b0, b1, b2, b3;
308 322
309 /* XXX: I did that only to give same values as previous code */ 323 /* XXX: I did that only to give same values as previous code */
310 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); 324 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
363 col[40] = ((a2 - b2) >> COL_SHIFT); 377 col[40] = ((a2 - b2) >> COL_SHIFT);
364 col[48] = ((a1 - b1) >> COL_SHIFT); 378 col[48] = ((a1 - b1) >> COL_SHIFT);
365 col[56] = ((a0 - b0) >> COL_SHIFT); 379 col[56] = ((a0 - b0) >> COL_SHIFT);
366 } 380 }
367 381
368 void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) 382 void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
369 { 383 {
370 int i; 384 int i;
371 for(i=0; i<8; i++) 385 for(i=0; i<8; i++)
372 idctRowCondDC(block + i*8); 386 idctRowCondDC(block + i*8);
373 387
374 for(i=0; i<8; i++) 388 for(i=0; i<8; i++)
375 idctSparseColPut(dest + i, line_size, block + i); 389 idctSparseColPut(dest + i, line_size, block + i);
376 } 390 }
377 391
378 void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) 392 void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
379 { 393 {
380 int i; 394 int i;
381 for(i=0; i<8; i++) 395 for(i=0; i<8; i++)
382 idctRowCondDC(block + i*8); 396 idctRowCondDC(block + i*8);
383 397
384 for(i=0; i<8; i++) 398 for(i=0; i<8; i++)
385 idctSparseColAdd(dest + i, line_size, block + i); 399 idctSparseColAdd(dest + i, line_size, block + i);
386 } 400 }
387 401
388 void simple_idct(INT16 *block) 402 void simple_idct(DCTELEM *block)
389 { 403 {
390 int i; 404 int i;
391 for(i=0; i<8; i++) 405 for(i=0; i<8; i++)
392 idctRowCondDC(block + i*8); 406 idctRowCondDC(block + i*8);
393 407
404 418
405 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, 419 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
406 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ 420 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
407 #define C_SHIFT (4+1+12) 421 #define C_SHIFT (4+1+12)
408 422
409 static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col) 423 static inline void idct4col(UINT8 *dest, int line_size, const DCTELEM *col)
410 { 424 {
411 int c0, c1, c2, c3, a0, a1, a2, a3; 425 int c0, c1, c2, c3, a0, a1, a2, a3;
412 const UINT8 *cm = cropTbl + MAX_NEG_CROP; 426 const UINT8 *cm = cropTbl + MAX_NEG_CROP;
413 427
414 a0 = col[8*0]; 428 a0 = col[8*0];
441 to the pixels before clamping to avoid systematic error 455 to the pixels before clamping to avoid systematic error
442 (1024*sqrt(2)) offset would be needed otherwise. */ 456 (1024*sqrt(2)) offset would be needed otherwise. */
443 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to 457 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to
444 compensate the extra butterfly stage - I don't have the full DV 458 compensate the extra butterfly stage - I don't have the full DV
445 specification */ 459 specification */
446 void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) 460 void simple_idct248_put(UINT8 *dest, int line_size, DCTELEM *block)
447 { 461 {
448 int i; 462 int i;
449 INT16 *ptr; 463 DCTELEM *ptr;
450 464
451 /* butterfly */ 465 /* butterfly */
452 ptr = block; 466 ptr = block;
453 for(i=0;i<4;i++) { 467 for(i=0;i<4;i++) {
454 BF(0); 468 BF(0);
484 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) 498 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
485 #define C1 C_FIX(0.6532814824) 499 #define C1 C_FIX(0.6532814824)
486 #define C2 C_FIX(0.2705980501) 500 #define C2 C_FIX(0.2705980501)
487 #define C3 C_FIX(0.5) 501 #define C3 C_FIX(0.5)
488 #define C_SHIFT (4+1+12) 502 #define C_SHIFT (4+1+12)
489 static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col) 503 static inline void idct4col_add(UINT8 *dest, int line_size, const DCTELEM *col)
490 { 504 {
491 int c0, c1, c2, c3, a0, a1, a2, a3; 505 int c0, c1, c2, c3, a0, a1, a2, a3;
492 const UINT8 *cm = cropTbl + MAX_NEG_CROP; 506 const UINT8 *cm = cropTbl + MAX_NEG_CROP;
493 507
494 a0 = col[8*0]; 508 a0 = col[8*0];
512 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) 526 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
513 #define R1 R_FIX(0.6532814824) 527 #define R1 R_FIX(0.6532814824)
514 #define R2 R_FIX(0.2705980501) 528 #define R2 R_FIX(0.2705980501)
515 #define R3 R_FIX(0.5) 529 #define R3 R_FIX(0.5)
516 #define R_SHIFT 11 530 #define R_SHIFT 11
517 static inline void idct4row(INT16 *row) 531 static inline void idct4row(DCTELEM *row)
518 { 532 {
519 int c0, c1, c2, c3, a0, a1, a2, a3; 533 int c0, c1, c2, c3, a0, a1, a2, a3;
520 const UINT8 *cm = cropTbl + MAX_NEG_CROP; 534 const UINT8 *cm = cropTbl + MAX_NEG_CROP;
521 535
522 a0 = row[0]; 536 a0 = row[0];
531 row[1]= (c2 + c3) >> R_SHIFT; 545 row[1]= (c2 + c3) >> R_SHIFT;
532 row[2]= (c2 - c3) >> R_SHIFT; 546 row[2]= (c2 - c3) >> R_SHIFT;
533 row[3]= (c0 - c1) >> R_SHIFT; 547 row[3]= (c0 - c1) >> R_SHIFT;
534 } 548 }
535 549
536 void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block) 550 void simple_idct84_add(UINT8 *dest, int line_size, DCTELEM *block)
537 { 551 {
538 int i; 552 int i;
539 553
540 /* IDCT8 on each line */ 554 /* IDCT8 on each line */
541 for(i=0; i<4; i++) { 555 for(i=0; i<4; i++) {
546 for(i=0;i<8;i++) { 560 for(i=0;i<8;i++) {
547 idct4col_add(dest + i, line_size, block + i); 561 idct4col_add(dest + i, line_size, block + i);
548 } 562 }
549 } 563 }
550 564
551 void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block) 565 void simple_idct48_add(UINT8 *dest, int line_size, DCTELEM *block)
552 { 566 {
553 int i; 567 int i;
554 568
555 /* IDCT4 on each line */ 569 /* IDCT4 on each line */
556 for(i=0; i<8; i++) { 570 for(i=0; i<8; i++) {