Mercurial > libavcodec.hg
comparison ppc/gmc_altivec.c @ 10079:71ead14665e3 libavcodec
PPC: simplify loading some values into altivec registers
Instead of filling a local array with the desired value and loading it,
load a single element and vec_splat() it to fill the vector.
author | mru |
---|---|
date | Mon, 24 Aug 2009 10:36:13 +0000 |
parents | 7cee7292d5cc |
children | 34a65026fa06 |
comparison
equal
deleted
inserted
replaced
10078:57f034d80624 | 10079:71ead14665e3 |
---|---|
21 */ | 21 */ |
22 | 22 |
23 #include "libavcodec/dsputil.h" | 23 #include "libavcodec/dsputil.h" |
24 #include "dsputil_ppc.h" | 24 #include "dsputil_ppc.h" |
25 #include "util_altivec.h" | 25 #include "util_altivec.h" |
26 #include "types_altivec.h" | |
26 | 27 |
27 /* | 28 /* |
28 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, | 29 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, |
29 to preserve proper dst alignment. | 30 to preserve proper dst alignment. |
30 */ | 31 */ |
31 #define GMC1_PERF_COND (h==8) | 32 #define GMC1_PERF_COND (h==8) |
32 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) | 33 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) |
33 { | 34 { |
34 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | 35 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); |
35 const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) = | 36 const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder; |
36 {rounder, rounder, rounder, rounder, | |
37 rounder, rounder, rounder, rounder}; | |
38 const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = | 37 const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = |
39 { | 38 { |
40 (16-x16)*(16-y16), /* A */ | 39 (16-x16)*(16-y16), /* A */ |
41 ( x16)*(16-y16), /* B */ | 40 ( x16)*(16-y16), /* B */ |
42 (16-x16)*( y16), /* C */ | 41 (16-x16)*( y16), /* C */ |
58 Av = vec_splat(tempA, 0); | 57 Av = vec_splat(tempA, 0); |
59 Bv = vec_splat(tempA, 1); | 58 Bv = vec_splat(tempA, 1); |
60 Cv = vec_splat(tempA, 2); | 59 Cv = vec_splat(tempA, 2); |
61 Dv = vec_splat(tempA, 3); | 60 Dv = vec_splat(tempA, 3); |
62 | 61 |
63 rounderV = vec_ld(0, (unsigned short*)rounder_a); | 62 rounderV = vec_splat((vec_u16)vec_lde(0, &rounder_a), 0); |
64 | 63 |
65 // we'll be able to pick-up our 9 char elements | 64 // we'll be able to pick-up our 9 char elements |
66 // at src from those 32 bytes | 65 // at src from those 32 bytes |
67 // we load the first batch here, as inside the loop | 66 // we load the first batch here, as inside the loop |
68 // we can re-use 'src+stride' from one iteration | 67 // we can re-use 'src+stride' from one iteration |