Mercurial > libavcodec.hg
annotate ppc/util_altivec.h @ 9859:7a116de63777 libavcodec
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
Includes mmx2 asm for the various functions.
Note that the actual idct still does not have an x86 SIMD implemtation.
For wmv3 files using regular idct, the decoder just falls back to simple_idct,
since simple_idct_dc doesn't exist (yet).
author | darkshikari |
---|---|
date | Tue, 16 Jun 2009 09:00:55 +0000 |
parents | e9d9d946f213 |
children | 7dd2a45249a9 |
rev | line source |
---|---|
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
1 /* |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
2 * This file is part of FFmpeg. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
3 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
4 * FFmpeg is free software; you can redistribute it and/or |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
5 * modify it under the terms of the GNU Lesser General Public |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
6 * License as published by the Free Software Foundation; either |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
7 * version 2.1 of the License, or (at your option) any later version. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
8 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
9 * FFmpeg is distributed in the hope that it will be useful, |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
12 * Lesser General Public License for more details. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
13 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
14 * You should have received a copy of the GNU Lesser General Public |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
15 * License along with FFmpeg; if not, write to the Free Software |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
17 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
18 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
19 /** |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8590
diff
changeset
|
20 * @file libavcodec/ppc/util_altivec.h |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
21 * Contains misc utility macros and inline functions |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
22 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
23 |
7760 | 24 #ifndef AVCODEC_PPC_UTIL_ALTIVEC_H |
25 #define AVCODEC_PPC_UTIL_ALTIVEC_H | |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
26 |
6078 | 27 #include <stdint.h> |
28 | |
29 #include "config.h" | |
30 | |
8590 | 31 #if HAVE_ALTIVEC_H |
6078 | 32 #include <altivec.h> |
33 #endif | |
34 | |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
35 // used to build registers permutation vectors (vcprm) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
36 // the 's' are for words in the _s_econd vector |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
37 #define WORD_0 0x00,0x01,0x02,0x03 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
38 #define WORD_1 0x04,0x05,0x06,0x07 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
39 #define WORD_2 0x08,0x09,0x0a,0x0b |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
40 #define WORD_3 0x0c,0x0d,0x0e,0x0f |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
41 #define WORD_s0 0x10,0x11,0x12,0x13 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
42 #define WORD_s1 0x14,0x15,0x16,0x17 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
43 #define WORD_s2 0x18,0x19,0x1a,0x1b |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
44 #define WORD_s3 0x1c,0x1d,0x1e,0x1f |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
45 |
7373
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
6368
diff
changeset
|
46 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} |
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
6368
diff
changeset
|
47 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
48 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
49 // vcprmle is used to keep the same index as in the SSE version. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
50 // it's the same as vcprm, with the index inversed |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
51 // ('le' is Little Endian) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
52 #define vcprmle(a,b,c,d) vcprm(d,c,b,a) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
53 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
54 // used to build inverse/identity vectors (vcii) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
55 // n is _n_egative, p is _p_ositive |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
56 #define FLOAT_n -1. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
57 #define FLOAT_p 1. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
58 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
59 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
60 // Transpose 8x8 matrix of 16-bit elements (in-place) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
61 #define TRANSPOSE8(a,b,c,d,e,f,g,h) \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
62 do { \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
63 vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
64 vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
65 \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
66 A1 = vec_mergeh (a, e); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
67 B1 = vec_mergel (a, e); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
68 C1 = vec_mergeh (b, f); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
69 D1 = vec_mergel (b, f); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
70 E1 = vec_mergeh (c, g); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
71 F1 = vec_mergel (c, g); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
72 G1 = vec_mergeh (d, h); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
73 H1 = vec_mergel (d, h); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
74 \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
75 A2 = vec_mergeh (A1, E1); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
76 B2 = vec_mergel (A1, E1); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
77 C2 = vec_mergeh (B1, F1); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
78 D2 = vec_mergel (B1, F1); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
79 E2 = vec_mergeh (C1, G1); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
80 F2 = vec_mergel (C1, G1); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
81 G2 = vec_mergeh (D1, H1); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
82 H2 = vec_mergel (D1, H1); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
83 \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
84 a = vec_mergeh (A2, E2); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
85 b = vec_mergel (A2, E2); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
86 c = vec_mergeh (B2, F2); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
87 d = vec_mergel (B2, F2); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
88 e = vec_mergeh (C2, G2); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
89 f = vec_mergel (C2, G2); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
90 g = vec_mergeh (D2, H2); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
91 h = vec_mergel (D2, H2); \ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
92 } while (0) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
93 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
94 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
95 /** \brief loads unaligned vector \a *src with offset \a offset |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
96 and returns it */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
97 static inline vector unsigned char unaligned_load(int offset, uint8_t *src) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
98 { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
99 register vector unsigned char first = vec_ld(offset, src); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
100 register vector unsigned char second = vec_ld(offset+15, src); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
101 register vector unsigned char mask = vec_lvsl(offset, src); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
102 return vec_perm(first, second, mask); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
103 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
104 |
7760 | 105 #endif /* AVCODEC_PPC_UTIL_ALTIVEC_H */ |