Mercurial > libavcodec.hg
comparison ppc/mpegvideo_altivec.c @ 2967:ef2149182f1c libavcodec
COSMETICS: Remove all trailing whitespace.
author | diego |
---|---|
date | Sat, 17 Dec 2005 18:14:38 +0000 |
parents | 9241d99f7443 |
children | bfabfdf9ce55 |
comparison
equal
deleted
inserted
replaced
2966:564788471dd4 | 2967:ef2149182f1c |
---|---|
23 #include <stdio.h> | 23 #include <stdio.h> |
24 #include "../dsputil.h" | 24 #include "../dsputil.h" |
25 #include "../mpegvideo.h" | 25 #include "../mpegvideo.h" |
26 | 26 |
27 #include "gcc_fixes.h" | 27 #include "gcc_fixes.h" |
28 | 28 |
29 #include "dsputil_altivec.h" | 29 #include "dsputil_altivec.h" |
30 | 30 |
31 // Swaps two variables (used for altivec registers) | 31 // Swaps two variables (used for altivec registers) |
32 #define SWAP(a,b) \ | 32 #define SWAP(a,b) \ |
33 do { \ | 33 do { \ |
101 #define FOUROF(a) (a) | 101 #define FOUROF(a) (a) |
102 #else | 102 #else |
103 // slower, for dumb non-apple GCC | 103 // slower, for dumb non-apple GCC |
104 #define FOUROF(a) {a,a,a,a} | 104 #define FOUROF(a) {a,a,a,a} |
105 #endif | 105 #endif |
106 int dct_quantize_altivec(MpegEncContext* s, | 106 int dct_quantize_altivec(MpegEncContext* s, |
107 DCTELEM* data, int n, | 107 DCTELEM* data, int n, |
108 int qscale, int* overflow) | 108 int qscale, int* overflow) |
109 { | 109 { |
110 int lastNonZero; | 110 int lastNonZero; |
111 vector float row0, row1, row2, row3, row4, row5, row6, row7; | 111 vector float row0, row1, row2, row3, row4, row5, row6, row7; |
271 } | 271 } |
272 | 272 |
273 if (whichPass == 1) | 273 if (whichPass == 1) |
274 { | 274 { |
275 // transpose the data for the second pass | 275 // transpose the data for the second pass |
276 | 276 |
277 // First, block transpose the upper right with lower left. | 277 // First, block transpose the upper right with lower left. |
278 SWAP(row4, alt0); | 278 SWAP(row4, alt0); |
279 SWAP(row5, alt1); | 279 SWAP(row5, alt1); |
280 SWAP(row6, alt2); | 280 SWAP(row6, alt2); |
281 SWAP(row7, alt3); | 281 SWAP(row7, alt3); |
378 vec_cmpgt(alt6, zero)); | 378 vec_cmpgt(alt6, zero)); |
379 alt7 = vec_sel(vec_madd(alt7, q7, negBias), vec_madd(alt7, q7, bias), | 379 alt7 = vec_sel(vec_madd(alt7, q7, negBias), vec_madd(alt7, q7, bias), |
380 vec_cmpgt(alt7, zero)); | 380 vec_cmpgt(alt7, zero)); |
381 } | 381 } |
382 | 382 |
383 | 383 |
384 } | 384 } |
385 | 385 |
386 // Store the data back into the original block | 386 // Store the data back into the original block |
387 { | 387 { |
388 vector signed short data0, data1, data2, data3, data4, data5, data6, data7; | 388 vector signed short data0, data1, data2, data3, data4, data5, data6, data7; |
467 | 467 |
468 | 468 |
469 vec_ste(scanIndices_01, 0, &lastNonZeroChar); | 469 vec_ste(scanIndices_01, 0, &lastNonZeroChar); |
470 | 470 |
471 lastNonZero = lastNonZeroChar; | 471 lastNonZero = lastNonZeroChar; |
472 | 472 |
473 // While the data is still in vectors we check for the transpose IDCT permute | 473 // While the data is still in vectors we check for the transpose IDCT permute |
474 // and handle it using the vector unit if we can. This is the permute used | 474 // and handle it using the vector unit if we can. This is the permute used |
475 // by the altivec idct, so it is common when using the altivec dct. | 475 // by the altivec idct, so it is common when using the altivec dct. |
476 | 476 |
477 if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) | 477 if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) |
521 | 521 |
522 /* | 522 /* |
523 AltiVec version of dct_unquantize_h263 | 523 AltiVec version of dct_unquantize_h263 |
524 this code assumes `block' is 16 bytes-aligned | 524 this code assumes `block' is 16 bytes-aligned |
525 */ | 525 */ |
526 void dct_unquantize_h263_altivec(MpegEncContext *s, | 526 void dct_unquantize_h263_altivec(MpegEncContext *s, |
527 DCTELEM *block, int n, int qscale) | 527 DCTELEM *block, int n, int qscale) |
528 { | 528 { |
529 POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1); | 529 POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1); |
530 int i, level, qmul, qadd; | 530 int i, level, qmul, qadd; |
531 int nCoeffs; | 531 int nCoeffs; |
532 | 532 |
533 assert(s->block_last_index[n]>=0); | 533 assert(s->block_last_index[n]>=0); |
534 | 534 |
535 POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); | 535 POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); |
536 | 536 |
537 qadd = (qscale - 1) | 1; | 537 qadd = (qscale - 1) | 1; |
538 qmul = qscale << 1; | 538 qmul = qscale << 1; |
539 | 539 |
540 if (s->mb_intra) { | 540 if (s->mb_intra) { |
541 if (!s->h263_aic) { | 541 if (!s->h263_aic) { |
542 if (n < 4) | 542 if (n < 4) |
543 block[0] = block[0] * s->y_dc_scale; | 543 block[0] = block[0] * s->y_dc_scale; |
544 else | 544 else |
545 block[0] = block[0] * s->c_dc_scale; | 545 block[0] = block[0] * s->c_dc_scale; |
546 }else | 546 }else |
547 qadd = 0; | 547 qadd = 0; |
548 i = 1; | 548 i = 1; |
549 nCoeffs= 63; //does not allways use zigzag table | 549 nCoeffs= 63; //does not allways use zigzag table |
550 } else { | 550 } else { |
551 i = 0; | 551 i = 0; |
552 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | 552 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; |
553 } | 553 } |
554 | 554 |
584 }; | 584 }; |
585 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; | 585 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; |
586 register vector bool short blockv_null, blockv_neg; | 586 register vector bool short blockv_null, blockv_neg; |
587 register short backup_0 = block[0]; | 587 register short backup_0 = block[0]; |
588 register int j = 0; | 588 register int j = 0; |
589 | 589 |
590 qmulv = vec_ld(0, qmul8); | 590 qmulv = vec_ld(0, qmul8); |
591 qaddv = vec_ld(0, qadd8); | 591 qaddv = vec_ld(0, qadd8); |
592 nqaddv = vec_ld(0, nqadd8); | 592 nqaddv = vec_ld(0, nqadd8); |
593 | 593 |
594 #if 0 // block *is* 16 bytes-aligned, it seems. | 594 #if 0 // block *is* 16 bytes-aligned, it seems. |
603 } | 603 } |
604 block[j] = level; | 604 block[j] = level; |
605 } | 605 } |
606 } | 606 } |
607 #endif | 607 #endif |
608 | 608 |
609 // vectorize all the 16 bytes-aligned blocks | 609 // vectorize all the 16 bytes-aligned blocks |
610 // of 8 elements | 610 // of 8 elements |
611 for(; (j + 7) <= nCoeffs ; j+=8) | 611 for(; (j + 7) <= nCoeffs ; j+=8) |
612 { | 612 { |
613 blockv = vec_ld(j << 1, block); | 613 blockv = vec_ld(j << 1, block); |
635 level = level * qmul + qadd; | 635 level = level * qmul + qadd; |
636 } | 636 } |
637 block[j] = level; | 637 block[j] = level; |
638 } | 638 } |
639 } | 639 } |
640 | 640 |
641 if (i == 1) | 641 if (i == 1) |
642 { // cheat. this avoid special-casing the first iteration | 642 { // cheat. this avoid special-casing the first iteration |
643 block[0] = backup_0; | 643 block[0] = backup_0; |
644 } | 644 } |
645 } | 645 } |