comparison ppc/mpegvideo_altivec.c @ 10079:71ead14665e3 libavcodec

PPC: simplify loading some values into altivec registers Instead of filling a local array with the desired value and loading it, load a single element and vec_splat() it to fill the vector.
author mru
date Mon, 24 Aug 2009 10:36:13 +0000
parents dd2b5e52336a
children 98970e51365a
comparison
equal deleted inserted replaced
10078:57f034d80624 10079:71ead14665e3
26 #include "libavcodec/dsputil.h" 26 #include "libavcodec/dsputil.h"
27 #include "libavcodec/mpegvideo.h" 27 #include "libavcodec/mpegvideo.h"
28 28
29 #include "dsputil_ppc.h" 29 #include "dsputil_ppc.h"
30 #include "util_altivec.h" 30 #include "util_altivec.h"
31 #include "types_altivec.h"
32
31 // Swaps two variables (used for altivec registers) 33 // Swaps two variables (used for altivec registers)
32 #define SWAP(a,b) \ 34 #define SWAP(a,b) \
33 do { \ 35 do { \
34 __typeof__(a) swap_temp=a; \ 36 __typeof__(a) swap_temp=a; \
35 a=b; \ 37 a=b; \
502 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; 504 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
503 } 505 }
504 506
505 { 507 {
506 register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); 508 register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
507 DECLARE_ALIGNED_16(short, qmul8[]) = 509 DECLARE_ALIGNED_16(short, qmul8) = qmul;
508 { 510 DECLARE_ALIGNED_16(short, qadd8) = qadd;
509 qmul, qmul, qmul, qmul,
510 qmul, qmul, qmul, qmul
511 };
512 DECLARE_ALIGNED_16(short, qadd8[]) =
513 {
514 qadd, qadd, qadd, qadd,
515 qadd, qadd, qadd, qadd
516 };
517 DECLARE_ALIGNED_16(short, nqadd8[]) =
518 {
519 -qadd, -qadd, -qadd, -qadd,
520 -qadd, -qadd, -qadd, -qadd
521 };
522 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; 511 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
523 register vector bool short blockv_null, blockv_neg; 512 register vector bool short blockv_null, blockv_neg;
524 register short backup_0 = block[0]; 513 register short backup_0 = block[0];
525 register int j = 0; 514 register int j = 0;
526 515
527 qmulv = vec_ld(0, qmul8); 516 qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);
528 qaddv = vec_ld(0, qadd8); 517 qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);
529 nqaddv = vec_ld(0, nqadd8); 518 nqaddv = vec_sub(vczero, qaddv);
530 519
531 #if 0 // block *is* 16 bytes-aligned, it seems. 520 #if 0 // block *is* 16 bytes-aligned, it seems.
532 // first make sure block[j] is 16 bytes-aligned 521 // first make sure block[j] is 16 bytes-aligned
533 for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) { 522 for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
534 level = block[j]; 523 level = block[j];