Mercurial > libavcodec.hg
comparison simple_idct.c @ 1008:fb6cbb8a04a3 libavcodec
fixing DCTELEM != short
author | michaelni |
---|---|
date | Wed, 15 Jan 2003 19:21:21 +0000 |
parents | caa77cd960c0 |
children | b32afefe7d33 |
comparison
equal
deleted
inserted
replaced
1007:b2cf2a1d9a51 | 1008:fb6cbb8a04a3 |
---|---|
65 /* signed 16x16 -> 32 multiply */ | 65 /* signed 16x16 -> 32 multiply */ |
66 #define MUL16(rt, ra, rb) rt = (ra) * (rb) | 66 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
67 | 67 |
68 #endif | 68 #endif |
69 | 69 |
70 static inline void idctRowCondDC (int16_t * row) | 70 static inline void idctRowCondDC (DCTELEM * row) |
71 { | 71 { |
72 int a0, a1, a2, a3, b0, b1, b2, b3; | 72 int a0, a1, a2, a3, b0, b1, b2, b3; |
73 #ifdef FAST_64BIT | 73 #ifdef FAST_64BIT |
74 uint64_t temp; | 74 uint64_t temp; |
75 #else | 75 #else |
80 #ifdef WORDS_BIGENDIAN | 80 #ifdef WORDS_BIGENDIAN |
81 #define ROW0_MASK 0xffff000000000000LL | 81 #define ROW0_MASK 0xffff000000000000LL |
82 #else | 82 #else |
83 #define ROW0_MASK 0xffffLL | 83 #define ROW0_MASK 0xffffLL |
84 #endif | 84 #endif |
85 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | | 85 if(sizeof(DCTELEM)==2){ |
86 ((uint64_t *)row)[1]) == 0) { | 86 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
87 temp = (row[0] << 3) & 0xffff; | 87 ((uint64_t *)row)[1]) == 0) { |
88 temp += temp << 16; | 88 temp = (row[0] << 3) & 0xffff; |
89 temp += temp << 32; | 89 temp += temp << 16; |
90 ((uint64_t *)row)[0] = temp; | 90 temp += temp << 32; |
91 ((uint64_t *)row)[1] = temp; | 91 ((uint64_t *)row)[0] = temp; |
92 return; | 92 ((uint64_t *)row)[1] = temp; |
93 } | 93 return; |
94 } | |
95 }else{ | |
96 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { | |
97 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; | |
98 return; | |
99 } | |
100 } | |
94 #else | 101 #else |
95 if (!(((uint32_t*)row)[1] | | 102 if(sizeof(DCTELEM)==2){ |
96 ((uint32_t*)row)[2] | | 103 if (!(((uint32_t*)row)[1] | |
97 ((uint32_t*)row)[3] | | 104 ((uint32_t*)row)[2] | |
98 row[1])) { | 105 ((uint32_t*)row)[3] | |
99 temp = (row[0] << 3) & 0xffff; | 106 row[1])) { |
100 temp += temp << 16; | 107 temp = (row[0] << 3) & 0xffff; |
101 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = | 108 temp += temp << 16; |
102 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | 109 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
103 return; | 110 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
104 } | 111 return; |
112 } | |
113 }else{ | |
114 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { | |
115 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; | |
116 return; | |
117 } | |
118 } | |
105 #endif | 119 #endif |
106 | 120 |
107 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); | 121 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
108 a1 = a0; | 122 a1 = a0; |
109 a2 = a0; | 123 a2 = a0; |
157 row[3] = (a3 + b3) >> ROW_SHIFT; | 171 row[3] = (a3 + b3) >> ROW_SHIFT; |
158 row[4] = (a3 - b3) >> ROW_SHIFT; | 172 row[4] = (a3 - b3) >> ROW_SHIFT; |
159 } | 173 } |
160 | 174 |
161 static inline void idctSparseColPut (UINT8 *dest, int line_size, | 175 static inline void idctSparseColPut (UINT8 *dest, int line_size, |
162 int16_t * col) | 176 DCTELEM * col) |
163 { | 177 { |
164 int a0, a1, a2, a3, b0, b1, b2, b3; | 178 int a0, a1, a2, a3, b0, b1, b2, b3; |
165 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 179 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
166 | 180 |
167 /* XXX: I did that only to give same values as previous code */ | 181 /* XXX: I did that only to give same values as previous code */ |
229 dest += line_size; | 243 dest += line_size; |
230 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | 244 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; |
231 } | 245 } |
232 | 246 |
233 static inline void idctSparseColAdd (UINT8 *dest, int line_size, | 247 static inline void idctSparseColAdd (UINT8 *dest, int line_size, |
234 int16_t * col) | 248 DCTELEM * col) |
235 { | 249 { |
236 int a0, a1, a2, a3, b0, b1, b2, b3; | 250 int a0, a1, a2, a3, b0, b1, b2, b3; |
237 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 251 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
238 | 252 |
239 /* XXX: I did that only to give same values as previous code */ | 253 /* XXX: I did that only to give same values as previous code */ |
300 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | 314 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; |
301 dest += line_size; | 315 dest += line_size; |
302 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | 316 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; |
303 } | 317 } |
304 | 318 |
305 static inline void idctSparseCol (int16_t * col) | 319 static inline void idctSparseCol (DCTELEM * col) |
306 { | 320 { |
307 int a0, a1, a2, a3, b0, b1, b2, b3; | 321 int a0, a1, a2, a3, b0, b1, b2, b3; |
308 | 322 |
309 /* XXX: I did that only to give same values as previous code */ | 323 /* XXX: I did that only to give same values as previous code */ |
310 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | 324 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
363 col[40] = ((a2 - b2) >> COL_SHIFT); | 377 col[40] = ((a2 - b2) >> COL_SHIFT); |
364 col[48] = ((a1 - b1) >> COL_SHIFT); | 378 col[48] = ((a1 - b1) >> COL_SHIFT); |
365 col[56] = ((a0 - b0) >> COL_SHIFT); | 379 col[56] = ((a0 - b0) >> COL_SHIFT); |
366 } | 380 } |
367 | 381 |
368 void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) | 382 void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) |
369 { | 383 { |
370 int i; | 384 int i; |
371 for(i=0; i<8; i++) | 385 for(i=0; i<8; i++) |
372 idctRowCondDC(block + i*8); | 386 idctRowCondDC(block + i*8); |
373 | 387 |
374 for(i=0; i<8; i++) | 388 for(i=0; i<8; i++) |
375 idctSparseColPut(dest + i, line_size, block + i); | 389 idctSparseColPut(dest + i, line_size, block + i); |
376 } | 390 } |
377 | 391 |
378 void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) | 392 void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) |
379 { | 393 { |
380 int i; | 394 int i; |
381 for(i=0; i<8; i++) | 395 for(i=0; i<8; i++) |
382 idctRowCondDC(block + i*8); | 396 idctRowCondDC(block + i*8); |
383 | 397 |
384 for(i=0; i<8; i++) | 398 for(i=0; i<8; i++) |
385 idctSparseColAdd(dest + i, line_size, block + i); | 399 idctSparseColAdd(dest + i, line_size, block + i); |
386 } | 400 } |
387 | 401 |
388 void simple_idct(INT16 *block) | 402 void simple_idct(DCTELEM *block) |
389 { | 403 { |
390 int i; | 404 int i; |
391 for(i=0; i<8; i++) | 405 for(i=0; i<8; i++) |
392 idctRowCondDC(block + i*8); | 406 idctRowCondDC(block + i*8); |
393 | 407 |
404 | 418 |
405 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, | 419 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, |
406 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ | 420 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ |
407 #define C_SHIFT (4+1+12) | 421 #define C_SHIFT (4+1+12) |
408 | 422 |
409 static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col) | 423 static inline void idct4col(UINT8 *dest, int line_size, const DCTELEM *col) |
410 { | 424 { |
411 int c0, c1, c2, c3, a0, a1, a2, a3; | 425 int c0, c1, c2, c3, a0, a1, a2, a3; |
412 const UINT8 *cm = cropTbl + MAX_NEG_CROP; | 426 const UINT8 *cm = cropTbl + MAX_NEG_CROP; |
413 | 427 |
414 a0 = col[8*0]; | 428 a0 = col[8*0]; |
441 to the pixels before clamping to avoid systematic error | 455 to the pixels before clamping to avoid systematic error |
442 (1024*sqrt(2)) offset would be needed otherwise. */ | 456 (1024*sqrt(2)) offset would be needed otherwise. */ |
443 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to | 457 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to |
444 compensate the extra butterfly stage - I don't have the full DV | 458 compensate the extra butterfly stage - I don't have the full DV |
445 specification */ | 459 specification */ |
446 void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) | 460 void simple_idct248_put(UINT8 *dest, int line_size, DCTELEM *block) |
447 { | 461 { |
448 int i; | 462 int i; |
449 INT16 *ptr; | 463 DCTELEM *ptr; |
450 | 464 |
451 /* butterfly */ | 465 /* butterfly */ |
452 ptr = block; | 466 ptr = block; |
453 for(i=0;i<4;i++) { | 467 for(i=0;i<4;i++) { |
454 BF(0); | 468 BF(0); |
484 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) | 498 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) |
485 #define C1 C_FIX(0.6532814824) | 499 #define C1 C_FIX(0.6532814824) |
486 #define C2 C_FIX(0.2705980501) | 500 #define C2 C_FIX(0.2705980501) |
487 #define C3 C_FIX(0.5) | 501 #define C3 C_FIX(0.5) |
488 #define C_SHIFT (4+1+12) | 502 #define C_SHIFT (4+1+12) |
489 static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col) | 503 static inline void idct4col_add(UINT8 *dest, int line_size, const DCTELEM *col) |
490 { | 504 { |
491 int c0, c1, c2, c3, a0, a1, a2, a3; | 505 int c0, c1, c2, c3, a0, a1, a2, a3; |
492 const UINT8 *cm = cropTbl + MAX_NEG_CROP; | 506 const UINT8 *cm = cropTbl + MAX_NEG_CROP; |
493 | 507 |
494 a0 = col[8*0]; | 508 a0 = col[8*0]; |
512 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) | 526 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) |
513 #define R1 R_FIX(0.6532814824) | 527 #define R1 R_FIX(0.6532814824) |
514 #define R2 R_FIX(0.2705980501) | 528 #define R2 R_FIX(0.2705980501) |
515 #define R3 R_FIX(0.5) | 529 #define R3 R_FIX(0.5) |
516 #define R_SHIFT 11 | 530 #define R_SHIFT 11 |
517 static inline void idct4row(INT16 *row) | 531 static inline void idct4row(DCTELEM *row) |
518 { | 532 { |
519 int c0, c1, c2, c3, a0, a1, a2, a3; | 533 int c0, c1, c2, c3, a0, a1, a2, a3; |
520 const UINT8 *cm = cropTbl + MAX_NEG_CROP; | 534 const UINT8 *cm = cropTbl + MAX_NEG_CROP; |
521 | 535 |
522 a0 = row[0]; | 536 a0 = row[0]; |
531 row[1]= (c2 + c3) >> R_SHIFT; | 545 row[1]= (c2 + c3) >> R_SHIFT; |
532 row[2]= (c2 - c3) >> R_SHIFT; | 546 row[2]= (c2 - c3) >> R_SHIFT; |
533 row[3]= (c0 - c1) >> R_SHIFT; | 547 row[3]= (c0 - c1) >> R_SHIFT; |
534 } | 548 } |
535 | 549 |
536 void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block) | 550 void simple_idct84_add(UINT8 *dest, int line_size, DCTELEM *block) |
537 { | 551 { |
538 int i; | 552 int i; |
539 | 553 |
540 /* IDCT8 on each line */ | 554 /* IDCT8 on each line */ |
541 for(i=0; i<4; i++) { | 555 for(i=0; i<4; i++) { |
546 for(i=0;i<8;i++) { | 560 for(i=0;i<8;i++) { |
547 idct4col_add(dest + i, line_size, block + i); | 561 idct4col_add(dest + i, line_size, block + i); |
548 } | 562 } |
549 } | 563 } |
550 | 564 |
551 void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block) | 565 void simple_idct48_add(UINT8 *dest, int line_size, DCTELEM *block) |
552 { | 566 { |
553 int i; | 567 int i; |
554 | 568 |
555 /* IDCT4 on each line */ | 569 /* IDCT4 on each line */ |
556 for(i=0; i<8; i++) { | 570 for(i=0; i<8; i++) { |