Mercurial > libavcodec.hg
annotate ppc/vc1dsp_altivec.c @ 6323:e6da66f378c7 libavcodec
mpegvideo.h has two function declarations with the 'inline' specifier
but no definition for those functions. The C standard requires a
definition to appear in the same translation unit for any function
declared with 'inline'. Most of the files including mpegvideo.h do not
define those functions. Fix this by removing the 'inline' specifiers
from the header.
patch by Uoti Urpala
author | diego |
---|---|
date | Sun, 03 Feb 2008 17:54:30 +0000 |
parents | 1ba8ee13e5b9 |
children | f7cbb7733146 |
rev | line source |
---|---|
3537 | 1 /* |
2 * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized | |
3 * Copyright (c) 2006 Konstantin Shishkov | |
4 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3537
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3537
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3537
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3537 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3537
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
3537 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3537
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
3537 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3537
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3537 | 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 */ | |
21 | |
5010
d5ba514e3f4a
Add libavcodec to compiler include flags in order to simplify header
diego
parents:
3983
diff
changeset
|
22 #include "dsputil.h" |
3537 | 23 |
24 #include "gcc_fixes.h" | |
25 | |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
5215
diff
changeset
|
26 #include "util_altivec.h" |
3537 | 27 |
28 // main steps of 8x8 transform | |
29 #define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \ | |
30 do { \ | |
31 t0 = vec_sl(vec_add(s0, s4), vec_2); \ | |
32 t0 = vec_add(vec_sl(t0, vec_1), t0); \ | |
33 t0 = vec_add(t0, vec_rnd); \ | |
34 t1 = vec_sl(vec_sub(s0, s4), vec_2); \ | |
35 t1 = vec_add(vec_sl(t1, vec_1), t1); \ | |
36 t1 = vec_add(t1, vec_rnd); \ | |
37 t2 = vec_add(vec_sl(s6, vec_2), vec_sl(s6, vec_1)); \ | |
38 t2 = vec_add(t2, vec_sl(s2, vec_4)); \ | |
39 t3 = vec_add(vec_sl(s2, vec_2), vec_sl(s2, vec_1)); \ | |
40 t3 = vec_sub(t3, vec_sl(s6, vec_4)); \ | |
41 t4 = vec_add(t0, t2); \ | |
42 t5 = vec_add(t1, t3); \ | |
43 t6 = vec_sub(t1, t3); \ | |
44 t7 = vec_sub(t0, t2); \ | |
45 \ | |
46 t0 = vec_sl(vec_add(s1, s3), vec_4); \ | |
47 t0 = vec_add(t0, vec_sl(s5, vec_3)); \ | |
48 t0 = vec_add(t0, vec_sl(s7, vec_2)); \ | |
49 t0 = vec_add(t0, vec_sub(s5, s3)); \ | |
50 \ | |
51 t1 = vec_sl(vec_sub(s1, s5), vec_4); \ | |
52 t1 = vec_sub(t1, vec_sl(s7, vec_3)); \ | |
53 t1 = vec_sub(t1, vec_sl(s3, vec_2)); \ | |
54 t1 = vec_sub(t1, vec_add(s1, s7)); \ | |
55 \ | |
56 t2 = vec_sl(vec_sub(s7, s3), vec_4); \ | |
57 t2 = vec_add(t2, vec_sl(s1, vec_3)); \ | |
58 t2 = vec_add(t2, vec_sl(s5, vec_2)); \ | |
59 t2 = vec_add(t2, vec_sub(s1, s7)); \ | |
60 \ | |
61 t3 = vec_sl(vec_sub(s5, s7), vec_4); \ | |
62 t3 = vec_sub(t3, vec_sl(s3, vec_3)); \ | |
63 t3 = vec_add(t3, vec_sl(s1, vec_2)); \ | |
64 t3 = vec_sub(t3, vec_add(s3, s5)); \ | |
65 \ | |
66 s0 = vec_add(t4, t0); \ | |
67 s1 = vec_add(t5, t1); \ | |
68 s2 = vec_add(t6, t2); \ | |
69 s3 = vec_add(t7, t3); \ | |
70 s4 = vec_sub(t7, t3); \ | |
71 s5 = vec_sub(t6, t2); \ | |
72 s6 = vec_sub(t5, t1); \ | |
73 s7 = vec_sub(t4, t0); \ | |
74 }while(0) | |
75 | |
76 #define SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7) \ | |
77 do { \ | |
78 s0 = vec_sra(s0, vec_3); \ | |
79 s1 = vec_sra(s1, vec_3); \ | |
80 s2 = vec_sra(s2, vec_3); \ | |
81 s3 = vec_sra(s3, vec_3); \ | |
82 s4 = vec_sra(s4, vec_3); \ | |
83 s5 = vec_sra(s5, vec_3); \ | |
84 s6 = vec_sra(s6, vec_3); \ | |
85 s7 = vec_sra(s7, vec_3); \ | |
86 }while(0) | |
87 | |
88 #define SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7) \ | |
89 do { \ | |
90 s0 = vec_sra(s0, vec_7); \ | |
91 s1 = vec_sra(s1, vec_7); \ | |
92 s2 = vec_sra(s2, vec_7); \ | |
93 s3 = vec_sra(s3, vec_7); \ | |
94 s4 = vec_sra(vec_add(s4, vec_1s), vec_7); \ | |
95 s5 = vec_sra(vec_add(s5, vec_1s), vec_7); \ | |
96 s6 = vec_sra(vec_add(s6, vec_1s), vec_7); \ | |
97 s7 = vec_sra(vec_add(s7, vec_1s), vec_7); \ | |
98 }while(0) | |
99 | |
100 /* main steps of 4x4 transform */ | |
101 #define STEP4(s0, s1, s2, s3, vec_rnd) \ | |
102 do { \ | |
103 t1 = vec_add(vec_sl(s0, vec_4), s0); \ | |
104 t1 = vec_add(t1, vec_rnd); \ | |
105 t2 = vec_add(vec_sl(s2, vec_4), s2); \ | |
106 t0 = vec_add(t1, t2); \ | |
107 t1 = vec_sub(t1, t2); \ | |
108 t3 = vec_sl(vec_sub(s3, s1), vec_1); \ | |
109 t3 = vec_add(t3, vec_sl(t3, vec_2)); \ | |
110 t2 = vec_add(t3, vec_sl(s1, vec_5)); \ | |
111 t3 = vec_add(t3, vec_sl(s3, vec_3)); \ | |
112 t3 = vec_add(t3, vec_sl(s3, vec_2)); \ | |
113 s0 = vec_add(t0, t2); \ | |
114 s1 = vec_sub(t1, t3); \ | |
115 s2 = vec_add(t1, t3); \ | |
116 s3 = vec_sub(t0, t2); \ | |
117 }while (0) | |
118 | |
119 #define SHIFT_HOR4(s0, s1, s2, s3) \ | |
120 s0 = vec_sra(s0, vec_3); \ | |
121 s1 = vec_sra(s1, vec_3); \ | |
122 s2 = vec_sra(s2, vec_3); \ | |
123 s3 = vec_sra(s3, vec_3); | |
124 | |
125 #define SHIFT_VERT4(s0, s1, s2, s3) \ | |
126 s0 = vec_sra(s0, vec_7); \ | |
127 s1 = vec_sra(s1, vec_7); \ | |
128 s2 = vec_sra(s2, vec_7); \ | |
129 s3 = vec_sra(s3, vec_7); | |
130 | |
131 /** Do inverse transform on 8x8 block | |
132 */ | |
133 static void vc1_inv_trans_8x8_altivec(DCTELEM block[64]) | |
134 { | |
135 vector signed short src0, src1, src2, src3, src4, src5, src6, src7; | |
136 vector signed int s0, s1, s2, s3, s4, s5, s6, s7; | |
137 vector signed int s8, s9, sA, sB, sC, sD, sE, sF; | |
138 vector signed int t0, t1, t2, t3, t4, t5, t6, t7; | |
139 const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4)); | |
140 const vector unsigned int vec_7 = vec_splat_u32(7); | |
141 const vector unsigned int vec_4 = vec_splat_u32(4); | |
142 const vector signed int vec_4s = vec_splat_s32(4); | |
143 const vector unsigned int vec_3 = vec_splat_u32(3); | |
144 const vector unsigned int vec_2 = vec_splat_u32(2); | |
145 const vector signed int vec_1s = vec_splat_s32(1); | |
146 const vector unsigned int vec_1 = vec_splat_u32(1); | |
147 | |
148 | |
149 src0 = vec_ld( 0, block); | |
150 src1 = vec_ld( 16, block); | |
151 src2 = vec_ld( 32, block); | |
152 src3 = vec_ld( 48, block); | |
153 src4 = vec_ld( 64, block); | |
154 src5 = vec_ld( 80, block); | |
155 src6 = vec_ld( 96, block); | |
156 src7 = vec_ld(112, block); | |
157 | |
158 TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); | |
159 s0 = vec_unpackl(src0); | |
160 s1 = vec_unpackl(src1); | |
161 s2 = vec_unpackl(src2); | |
162 s3 = vec_unpackl(src3); | |
163 s4 = vec_unpackl(src4); | |
164 s5 = vec_unpackl(src5); | |
165 s6 = vec_unpackl(src6); | |
166 s7 = vec_unpackl(src7); | |
167 s8 = vec_unpackh(src0); | |
168 s9 = vec_unpackh(src1); | |
169 sA = vec_unpackh(src2); | |
170 sB = vec_unpackh(src3); | |
171 sC = vec_unpackh(src4); | |
172 sD = vec_unpackh(src5); | |
173 sE = vec_unpackh(src6); | |
174 sF = vec_unpackh(src7); | |
175 STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s); | |
176 SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7); | |
177 STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s); | |
178 SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF); | |
179 src0 = vec_pack(s8, s0); | |
180 src1 = vec_pack(s9, s1); | |
181 src2 = vec_pack(sA, s2); | |
182 src3 = vec_pack(sB, s3); | |
183 src4 = vec_pack(sC, s4); | |
184 src5 = vec_pack(sD, s5); | |
185 src6 = vec_pack(sE, s6); | |
186 src7 = vec_pack(sF, s7); | |
187 TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); | |
188 | |
189 s0 = vec_unpackl(src0); | |
190 s1 = vec_unpackl(src1); | |
191 s2 = vec_unpackl(src2); | |
192 s3 = vec_unpackl(src3); | |
193 s4 = vec_unpackl(src4); | |
194 s5 = vec_unpackl(src5); | |
195 s6 = vec_unpackl(src6); | |
196 s7 = vec_unpackl(src7); | |
197 s8 = vec_unpackh(src0); | |
198 s9 = vec_unpackh(src1); | |
199 sA = vec_unpackh(src2); | |
200 sB = vec_unpackh(src3); | |
201 sC = vec_unpackh(src4); | |
202 sD = vec_unpackh(src5); | |
203 sE = vec_unpackh(src6); | |
204 sF = vec_unpackh(src7); | |
205 STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64); | |
206 SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7); | |
207 STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64); | |
208 SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF); | |
209 src0 = vec_pack(s8, s0); | |
210 src1 = vec_pack(s9, s1); | |
211 src2 = vec_pack(sA, s2); | |
212 src3 = vec_pack(sB, s3); | |
213 src4 = vec_pack(sC, s4); | |
214 src5 = vec_pack(sD, s5); | |
215 src6 = vec_pack(sE, s6); | |
216 src7 = vec_pack(sF, s7); | |
217 | |
218 vec_st(src0, 0, block); | |
219 vec_st(src1, 16, block); | |
220 vec_st(src2, 32, block); | |
221 vec_st(src3, 48, block); | |
222 vec_st(src4, 64, block); | |
223 vec_st(src5, 80, block); | |
224 vec_st(src6, 96, block); | |
225 vec_st(src7,112, block); | |
226 } | |
227 | |
228 /** Do inverse transform on 8x4 part of block | |
229 */ | |
5999 | 230 static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block) |
3537 | 231 { |
232 vector signed short src0, src1, src2, src3, src4, src5, src6, src7; | |
233 vector signed int s0, s1, s2, s3, s4, s5, s6, s7; | |
234 vector signed int s8, s9, sA, sB, sC, sD, sE, sF; | |
235 vector signed int t0, t1, t2, t3, t4, t5, t6, t7; | |
236 const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4)); | |
237 const vector unsigned int vec_7 = vec_splat_u32(7); | |
238 const vector unsigned int vec_5 = vec_splat_u32(5); | |
239 const vector unsigned int vec_4 = vec_splat_u32(4); | |
240 const vector signed int vec_4s = vec_splat_s32(4); | |
241 const vector unsigned int vec_3 = vec_splat_u32(3); | |
242 const vector unsigned int vec_2 = vec_splat_u32(2); | |
243 const vector unsigned int vec_1 = vec_splat_u32(1); | |
5999 | 244 vector unsigned char tmp; |
245 vector signed short tmp2, tmp3; | |
246 vector unsigned char perm0, perm1, p0, p1, p; | |
3537 | 247 |
248 src0 = vec_ld( 0, block); | |
249 src1 = vec_ld( 16, block); | |
250 src2 = vec_ld( 32, block); | |
251 src3 = vec_ld( 48, block); | |
252 src4 = vec_ld( 64, block); | |
253 src5 = vec_ld( 80, block); | |
254 src6 = vec_ld( 96, block); | |
255 src7 = vec_ld(112, block); | |
256 | |
257 TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); | |
258 s0 = vec_unpackl(src0); | |
259 s1 = vec_unpackl(src1); | |
260 s2 = vec_unpackl(src2); | |
261 s3 = vec_unpackl(src3); | |
262 s4 = vec_unpackl(src4); | |
263 s5 = vec_unpackl(src5); | |
264 s6 = vec_unpackl(src6); | |
265 s7 = vec_unpackl(src7); | |
266 s8 = vec_unpackh(src0); | |
267 s9 = vec_unpackh(src1); | |
268 sA = vec_unpackh(src2); | |
269 sB = vec_unpackh(src3); | |
270 sC = vec_unpackh(src4); | |
271 sD = vec_unpackh(src5); | |
272 sE = vec_unpackh(src6); | |
273 sF = vec_unpackh(src7); | |
274 STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s); | |
275 SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7); | |
276 STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s); | |
277 SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF); | |
278 src0 = vec_pack(s8, s0); | |
279 src1 = vec_pack(s9, s1); | |
280 src2 = vec_pack(sA, s2); | |
281 src3 = vec_pack(sB, s3); | |
282 src4 = vec_pack(sC, s4); | |
283 src5 = vec_pack(sD, s5); | |
284 src6 = vec_pack(sE, s6); | |
285 src7 = vec_pack(sF, s7); | |
286 TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); | |
287 | |
6000 | 288 s0 = vec_unpackh(src0); |
289 s1 = vec_unpackh(src1); | |
290 s2 = vec_unpackh(src2); | |
291 s3 = vec_unpackh(src3); | |
292 s8 = vec_unpackl(src0); | |
293 s9 = vec_unpackl(src1); | |
294 sA = vec_unpackl(src2); | |
295 sB = vec_unpackl(src3); | |
296 STEP4(s0, s1, s2, s3, vec_64); | |
297 SHIFT_VERT4(s0, s1, s2, s3); | |
298 STEP4(s8, s9, sA, sB, vec_64); | |
299 SHIFT_VERT4(s8, s9, sA, sB); | |
300 src0 = vec_pack(s0, s8); | |
301 src1 = vec_pack(s1, s9); | |
302 src2 = vec_pack(s2, sA); | |
303 src3 = vec_pack(s3, sB); | |
3537 | 304 |
5999 | 305 p0 = vec_lvsl (0, dest); |
306 p1 = vec_lvsl (stride, dest); | |
307 p = vec_splat_u8 (-1); | |
308 perm0 = vec_mergeh (p, p0); | |
309 perm1 = vec_mergeh (p, p1); | |
3537 | 310 |
5999 | 311 #define ADD(dest,src,perm) \ |
312 /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ | |
313 tmp = vec_ld (0, dest); \ | |
6028
1ba8ee13e5b9
Make strict altivec parsers happy (gcc-4.3 and others)
lu_zero
parents:
6000
diff
changeset
|
314 tmp2 = (vector signed short)vec_perm (tmp, vec_splat_u8(0), perm); \ |
5999 | 315 tmp3 = vec_adds (tmp2, src); \ |
316 tmp = vec_packsu (tmp3, tmp3); \ | |
317 vec_ste ((vector unsigned int)tmp, 0, (unsigned int *)dest); \ | |
318 vec_ste ((vector unsigned int)tmp, 4, (unsigned int *)dest); | |
319 | |
320 ADD (dest, src0, perm0) dest += stride; | |
321 ADD (dest, src1, perm1) dest += stride; | |
322 ADD (dest, src2, perm0) dest += stride; | |
323 ADD (dest, src3, perm1) | |
3537 | 324 } |
325 | |
326 | |
327 void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) { | |
328 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec; | |
5999 | 329 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec; |
3537 | 330 } |