annotate ppc/vp3dsp_altivec.c @ 12483:0159a19bfff7 libavcodec

aacdec: Rework channel mapping compatibility hacks. For a PCE based configuration map the channels solely based on tags. For an indexed configuration map the channels solely based on position. This works with all known exotic samples including al17, elem_id0, bad_concat, and lfe_is_sce.
author alexc
date Fri, 10 Sep 2010 18:01:48 +0000
parents 50415a8f1451
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9711
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
1 /*
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
2 * Copyright (C) 2009 David Conrad
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
3 *
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
4 * This file is part of FFmpeg.
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
5 *
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
10 *
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
14 * Lesser General Public License for more details.
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
15 *
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
19 */
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
20
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
21 #include "libavcodec/dsputil.h"
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
22 #include "util_altivec.h"
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
23 #include "types_altivec.h"
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 9711
diff changeset
24 #include "dsputil_altivec.h"
9711
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
25
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
26 static const vec_s16 constants =
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
27 {0, 64277, 60547, 54491, 46341, 36410, 25080, 12785};
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
28 static const vec_u8 interleave_high =
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
29 {0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29};
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
30
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
31 #define IDCT_START \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
32 vec_s16 A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
33 vec_s16 Ed, Gd, Add, Bdd, Fd, Hd;\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
34 vec_s16 eight = vec_splat_s16(8);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
35 vec_u16 four = vec_splat_u16(4);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
36 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
37 vec_s16 C1 = vec_splat(constants, 1);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
38 vec_s16 C2 = vec_splat(constants, 2);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
39 vec_s16 C3 = vec_splat(constants, 3);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
40 vec_s16 C4 = vec_splat(constants, 4);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
41 vec_s16 C5 = vec_splat(constants, 5);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
42 vec_s16 C6 = vec_splat(constants, 6);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
43 vec_s16 C7 = vec_splat(constants, 7);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
44 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
45 vec_s16 b0 = vec_ld(0x00, block);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
46 vec_s16 b1 = vec_ld(0x10, block);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
47 vec_s16 b2 = vec_ld(0x20, block);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
48 vec_s16 b3 = vec_ld(0x30, block);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
49 vec_s16 b4 = vec_ld(0x40, block);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
50 vec_s16 b5 = vec_ld(0x50, block);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
51 vec_s16 b6 = vec_ld(0x60, block);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
52 vec_s16 b7 = vec_ld(0x70, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
53
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
54 // these functions do (a*C)>>16
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
55 // things are tricky because a is signed, but C unsigned.
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
56 // M15 is used if C fits in 15 bit unsigned (C6,C7)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
57 // M16 is used if C requires 16 bits unsigned
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
58 static inline vec_s16 M15(vec_s16 a, vec_s16 C)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
59 {
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
60 return (vec_s16)vec_perm(vec_mule(a,C), vec_mulo(a,C), interleave_high);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
61 }
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
62 static inline vec_s16 M16(vec_s16 a, vec_s16 C)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
63 {
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
64 return vec_add(a, M15(a, C));
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
65 }
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
66
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
67 #define IDCT_1D(ADD, SHIFT)\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
68 A = vec_add(M16(b1, C1), M15(b7, C7));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
69 B = vec_sub(M15(b1, C7), M16(b7, C1));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
70 C = vec_add(M16(b3, C3), M16(b5, C5));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
71 D = vec_sub(M16(b5, C3), M16(b3, C5));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
72 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
73 Ad = M16(vec_sub(A, C), C4);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
74 Bd = M16(vec_sub(B, D), C4);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
75 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
76 Cd = vec_add(A, C);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
77 Dd = vec_add(B, D);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
78 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
79 E = ADD(M16(vec_add(b0, b4), C4));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
80 F = ADD(M16(vec_sub(b0, b4), C4));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
81 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
82 G = vec_add(M16(b2, C2), M15(b6, C6));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
83 H = vec_sub(M15(b2, C6), M16(b6, C2));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
84 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
85 Ed = vec_sub(E, G);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
86 Gd = vec_add(E, G);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
87 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
88 Add = vec_add(F, Ad);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
89 Bdd = vec_sub(Bd, H);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
90 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
91 Fd = vec_sub(F, Ad);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
92 Hd = vec_add(Bd, H);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
93 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
94 b0 = SHIFT(vec_add(Gd, Cd));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
95 b7 = SHIFT(vec_sub(Gd, Cd));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
96 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
97 b1 = SHIFT(vec_add(Add, Hd));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
98 b2 = SHIFT(vec_sub(Add, Hd));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
99 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
100 b3 = SHIFT(vec_add(Ed, Dd));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
101 b4 = SHIFT(vec_sub(Ed, Dd));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
102 \
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
103 b5 = SHIFT(vec_add(Fd, Bdd));\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
104 b6 = SHIFT(vec_sub(Fd, Bdd));
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
105
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
106 #define NOP(a) a
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
107 #define ADD8(a) vec_add(a, eight)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
108 #define SHIFT4(a) vec_sra(a, four)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
109
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
110 void ff_vp3_idct_altivec(DCTELEM block[64])
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
111 {
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
112 IDCT_START
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
113
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
114 IDCT_1D(NOP, NOP)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
115 TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
116 IDCT_1D(ADD8, SHIFT4)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
117
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
118 vec_st(b0, 0x00, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
119 vec_st(b1, 0x10, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
120 vec_st(b2, 0x20, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
121 vec_st(b3, 0x30, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
122 vec_st(b4, 0x40, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
123 vec_st(b5, 0x50, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
124 vec_st(b6, 0x60, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
125 vec_st(b7, 0x70, block);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
126 }
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
127
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
128 void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
129 {
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
130 vec_u8 t;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
131 IDCT_START
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
132
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
133 // pixels are signed; so add 128*16 in addition to the normal 8
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
134 vec_s16 v2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
135 eight = vec_add(eight, v2048);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
136
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
137 IDCT_1D(NOP, NOP)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
138 TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
139 IDCT_1D(ADD8, SHIFT4)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
140
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
141 #define PUT(a)\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
142 t = vec_packsu(a, a);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
143 vec_ste((vec_u32)t, 0, (unsigned int *)dst);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
144 vec_ste((vec_u32)t, 4, (unsigned int *)dst);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
145
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
146 PUT(b0) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
147 PUT(b1) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
148 PUT(b2) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
149 PUT(b3) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
150 PUT(b4) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
151 PUT(b5) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
152 PUT(b6) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
153 PUT(b7)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
154 }
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
155
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
156 void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
157 {
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
158 LOAD_ZERO;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
159 vec_u8 t, vdst;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
160 vec_s16 vdst_16;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
161 vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst));
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
162
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
163 IDCT_START
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
164
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
165 IDCT_1D(NOP, NOP)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
166 TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
167 IDCT_1D(ADD8, SHIFT4)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
168
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
169 #define ADD(a)\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
170 vdst = vec_ld(0, dst);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
171 vdst_16 = (vec_s16)vec_perm(vdst, zero_u8v, vdst_mask);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
172 vdst_16 = vec_adds(a, vdst_16);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
173 t = vec_packsu(vdst_16, vdst_16);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
174 vec_ste((vec_u32)t, 0, (unsigned int *)dst);\
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
175 vec_ste((vec_u32)t, 4, (unsigned int *)dst);
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
176
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
177 ADD(b0) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
178 ADD(b1) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
179 ADD(b2) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
180 ADD(b3) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
181 ADD(b4) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
182 ADD(b5) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
183 ADD(b6) dst += stride;
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
184 ADD(b7)
d563821462b4 Altivec VP3 IDCT
conrad
parents:
diff changeset
185 }