comparison ppc/gmc_altivec.c @ 10079:71ead14665e3 libavcodec

PPC: simplify loading some values into altivec registers Instead of filling a local array with the desired value and loading it, load a single element and vec_splat() it to fill the vector.
author mru
date Mon, 24 Aug 2009 10:36:13 +0000
parents 7cee7292d5cc
children 34a65026fa06
comparison
equal deleted inserted replaced
10078:57f034d80624 10079:71ead14665e3
21 */ 21 */
22 22
23 #include "libavcodec/dsputil.h" 23 #include "libavcodec/dsputil.h"
24 #include "dsputil_ppc.h" 24 #include "dsputil_ppc.h"
25 #include "util_altivec.h" 25 #include "util_altivec.h"
26 #include "types_altivec.h"
26 27
27 /* 28 /*
28 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, 29 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
29 to preserve proper dst alignment. 30 to preserve proper dst alignment.
30 */ 31 */
31 #define GMC1_PERF_COND (h==8) 32 #define GMC1_PERF_COND (h==8)
32 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) 33 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
33 { 34 {
34 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); 35 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
35 const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) = 36 const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder;
36 {rounder, rounder, rounder, rounder,
37 rounder, rounder, rounder, rounder};
38 const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = 37 const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
39 { 38 {
40 (16-x16)*(16-y16), /* A */ 39 (16-x16)*(16-y16), /* A */
41 ( x16)*(16-y16), /* B */ 40 ( x16)*(16-y16), /* B */
42 (16-x16)*( y16), /* C */ 41 (16-x16)*( y16), /* C */
58 Av = vec_splat(tempA, 0); 57 Av = vec_splat(tempA, 0);
59 Bv = vec_splat(tempA, 1); 58 Bv = vec_splat(tempA, 1);
60 Cv = vec_splat(tempA, 2); 59 Cv = vec_splat(tempA, 2);
61 Dv = vec_splat(tempA, 3); 60 Dv = vec_splat(tempA, 3);
62 61
63 rounderV = vec_ld(0, (unsigned short*)rounder_a); 62 rounderV = vec_splat((vec_u16)vec_lde(0, &rounder_a), 0);
64 63
65 // we'll be able to pick-up our 9 char elements 64 // we'll be able to pick-up our 9 char elements
66 // at src from those 32 bytes 65 // at src from those 32 bytes
67 // we load the first batch here, as inside the loop 66 // we load the first batch here, as inside the loop
68 // we can re-use 'src+stride' from one iteration 67 // we can re-use 'src+stride' from one iteration