Mercurial > libavcodec.hg
annotate ppc/float_altivec.c @ 7891:13ee9bb85721 libavcodec
Zero 'idx' for each iteration of the coupling gain loop and as it is only used
within this loop, we can move its declaration there too. This fixes bitstream
desync when decoding streams containing CCEs.
Based on a patch by Alex Converse (alex converse gmail com)
author | superdump |
---|---|
date | Thu, 18 Sep 2008 16:02:51 +0000 |
parents | 8313d5901c5c |
children | d33b47d1f4c1 |
rev | line source |
---|---|
3581 | 1 /* |
2 * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> | |
3 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3659
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3659
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3659
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
3581 | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3659
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
3581 | 10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3659
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
3581 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3659
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3581 | 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 */ | |
20 | |
6763 | 21 #include "libavcodec/dsputil.h" |
3581 | 22 |
23 #include "gcc_fixes.h" | |
24 | |
25 #include "dsputil_altivec.h" | |
26 | |
27 static void vector_fmul_altivec(float *dst, const float *src, int len) | |
28 { | |
29 int i; | |
30 vector float d0, d1, s, zero = (vector float)vec_splat_u32(0); | |
31 for(i=0; i<len-7; i+=8) { | |
32 d0 = vec_ld(0, dst+i); | |
33 s = vec_ld(0, src+i); | |
34 d1 = vec_ld(16, dst+i); | |
35 d0 = vec_madd(d0, s, zero); | |
36 d1 = vec_madd(d1, vec_ld(16,src+i), zero); | |
37 vec_st(d0, 0, dst+i); | |
38 vec_st(d1, 16, dst+i); | |
39 } | |
40 } | |
41 | |
42 static void vector_fmul_reverse_altivec(float *dst, const float *src0, | |
43 const float *src1, int len) | |
44 { | |
45 int i; | |
46 vector float d, s0, s1, h0, l0, | |
47 s2, s3, zero = (vector float)vec_splat_u32(0); | |
48 src1 += len-4; | |
49 for(i=0; i<len-7; i+=8) { | |
50 s1 = vec_ld(0, src1-i); // [a,b,c,d] | |
51 s0 = vec_ld(0, src0+i); | |
52 l0 = vec_mergel(s1, s1); // [c,c,d,d] | |
53 s3 = vec_ld(-16, src1-i); | |
54 h0 = vec_mergeh(s1, s1); // [a,a,b,b] | |
55 s2 = vec_ld(16, src0+i); | |
56 s1 = vec_mergeh(vec_mergel(l0,h0), // [d,b,d,b] | |
57 vec_mergeh(l0,h0)); // [c,a,c,a] | |
58 // [d,c,b,a] | |
59 l0 = vec_mergel(s3, s3); | |
60 d = vec_madd(s0, s1, zero); | |
61 h0 = vec_mergeh(s3, s3); | |
62 vec_st(d, 0, dst+i); | |
63 s3 = vec_mergeh(vec_mergel(l0,h0), | |
64 vec_mergeh(l0,h0)); | |
65 d = vec_madd(s2, s3, zero); | |
66 vec_st(d, 16, dst+i); | |
67 } | |
68 } | |
69 | |
70 static void vector_fmul_add_add_altivec(float *dst, const float *src0, | |
71 const float *src1, const float *src2, | |
72 int src3, int len, int step) | |
73 { | |
74 int i; | |
75 vector float d, s0, s1, s2, t0, t1, edges; | |
76 vector unsigned char align = vec_lvsr(0,dst), | |
77 mask = vec_lvsl(0, dst); | |
78 | |
79 #if 0 //FIXME: there is still something wrong | |
80 if (step == 2) { | |
81 int y; | |
82 vector float d0, d1, s3, t2; | |
83 vector unsigned int sel = | |
84 vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0)); | |
85 t1 = vec_ld(16, dst); | |
86 for (i=0,y=0; i<len-3; i+=4,y+=8) { | |
87 | |
88 s0 = vec_ld(0,src0+i); | |
89 s1 = vec_ld(0,src1+i); | |
90 s2 = vec_ld(0,src2+i); | |
91 | |
92 // t0 = vec_ld(0, dst+y); //[x x x|a] | |
93 // t1 = vec_ld(16, dst+y); //[b c d|e] | |
94 t2 = vec_ld(31, dst+y); //[f g h|x] | |
95 | |
96 d = vec_madd(s0,s1,s2); // [A B C D] | |
97 | |
98 // [A A B B] | |
99 | |
100 // [C C D D] | |
101 | |
102 d0 = vec_perm(t0, t1, mask); // [a b c d] | |
103 | |
104 d0 = vec_sel(vec_mergeh(d, d), d0, sel); // [A b B d] | |
105 | |
106 edges = vec_perm(t1, t0, mask); | |
107 | |
108 t0 = vec_perm(edges, d0, align); // [x x x|A] | |
109 | |
110 t1 = vec_perm(d0, edges, align); // [b B d|e] | |
111 | |
112 vec_stl(t0, 0, dst+y); | |
113 | |
114 d1 = vec_perm(t1, t2, mask); // [e f g h] | |
115 | |
116 d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h] | |
117 | |
118 edges = vec_perm(t2, t1, mask); | |
119 | |
120 t1 = vec_perm(edges, d1, align); // [b B d|C] | |
121 | |
122 t2 = vec_perm(d1, edges, align); // [f D h|x] | |
123 | |
124 vec_stl(t1, 16, dst+y); | |
125 | |
126 t0 = t1; | |
127 | |
128 vec_stl(t2, 31, dst+y); | |
129 | |
130 t1 = t2; | |
131 } | |
132 } else | |
133 #endif | |
134 if (step == 1 && src3 == 0) | |
135 for (i=0; i<len-3; i+=4) { | |
4387 | 136 t0 = vec_ld(0, dst+i); |
3581 | 137 t1 = vec_ld(15, dst+i); |
138 s0 = vec_ld(0, src0+i); | |
139 s1 = vec_ld(0, src1+i); | |
140 s2 = vec_ld(0, src2+i); | |
141 edges = vec_perm(t1 ,t0, mask); | |
142 d = vec_madd(s0,s1,s2); | |
3659
dd55fb216497
Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents:
3657
diff
changeset
|
143 t1 = vec_perm(d, edges, align); |
dd55fb216497
Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents:
3657
diff
changeset
|
144 t0 = vec_perm(edges, d, align); |
dd55fb216497
Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents:
3657
diff
changeset
|
145 vec_st(t1, 15, dst+i); |
3581 | 146 vec_st(t0, 0, dst+i); |
147 } | |
148 else | |
149 ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); | |
150 } | |
151 | |
7674 | 152 |
153 static vector signed short | |
154 float_to_int16_one_altivec(const float *src) | |
155 { | |
156 vector float s0 = vec_ld(0, src); | |
157 vector float s1 = vec_ld(16, src); | |
158 vector signed int t0 = vec_cts(s0, 0); | |
159 vector signed int t1 = vec_cts(s1, 0); | |
160 return vec_packs(t0,t1); | |
161 } | |
162 | |
7691
8313d5901c5c
Missing static in float_to_int16_altivec declaration
lu_zero
parents:
7676
diff
changeset
|
163 static void float_to_int16_altivec(int16_t *dst, const float *src, int len) |
3581 | 164 { |
165 int i; | |
166 vector signed short d0, d1, d; | |
167 vector unsigned char align; | |
168 if(((long)dst)&15) //FIXME | |
169 for(i=0; i<len-7; i+=8) { | |
170 d0 = vec_ld(0, dst+i); | |
7674 | 171 d = float_to_int16_one_altivec(src+i); |
3581 | 172 d1 = vec_ld(15, dst+i); |
173 d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i)); | |
174 align = vec_lvsr(0, dst+i); | |
3657
ff6720290478
Fix float_to_int16, unaligned case, broken by the previous commit
lu_zero
parents:
3583
diff
changeset
|
175 d0 = vec_perm(d1, d, align); |
ff6720290478
Fix float_to_int16, unaligned case, broken by the previous commit
lu_zero
parents:
3583
diff
changeset
|
176 d1 = vec_perm(d, d1, align); |
3581 | 177 vec_st(d0, 0, dst+i); |
178 vec_st(d1,15, dst+i); | |
179 } | |
180 else | |
181 for(i=0; i<len-7; i+=8) { | |
7674 | 182 d = float_to_int16_one_altivec(src+i); |
3581 | 183 vec_st(d, 0, dst+i); |
184 } | |
185 } | |
186 | |
7675
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
187 static void |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
188 float_to_int16_interleave_altivec(int16_t *dst, const float **src, |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
189 long len, int channels) |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
190 { |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
191 int i; |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
192 vector signed short d0, d1, d2, c0, c1, t0, t1; |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
193 vector unsigned char align; |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
194 if(channels == 1) |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
195 float_to_int16_altivec(dst, src[0], len); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
196 else |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
197 if (channels == 2) { |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
198 if(((long)dst)&15) |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
199 for(i=0; i<len-7; i+=8) { |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
200 d0 = vec_ld(0, dst + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
201 t0 = float_to_int16_one_altivec(src[0] + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
202 d1 = vec_ld(31, dst + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
203 t1 = float_to_int16_one_altivec(src[1] + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
204 c0 = vec_mergeh(t0, t1); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
205 c1 = vec_mergel(t0, t1); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
206 d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
207 align = vec_lvsr(0, dst + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
208 d0 = vec_perm(d2, c0, align); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
209 d1 = vec_perm(c0, c1, align); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
210 vec_st(d0, 0, dst + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
211 d0 = vec_perm(c1, d2, align); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
212 vec_st(d1, 15, dst + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
213 vec_st(d0, 31, dst + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
214 dst+=8; |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
215 } |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
216 else |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
217 for(i=0; i<len-7; i+=8) { |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
218 t0 = float_to_int16_one_altivec(src[0] + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
219 t1 = float_to_int16_one_altivec(src[1] + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
220 d0 = vec_mergeh(t0, t1); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
221 d1 = vec_mergel(t0, t1); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
222 vec_st(d0, 0, dst + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
223 vec_st(d1, 16, dst + i); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
224 dst+=8; |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
225 } |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
226 } else { |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
227 DECLARE_ALIGNED(16, int16_t, tmp[len]); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
228 int c, j; |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
229 for (c = 0; c < channels; c++) { |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
230 float_to_int16_altivec(tmp, src[c], len); |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
231 for (i = 0, j = c; i < len; i++, j+=channels) { |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
232 dst[j] = tmp[i]; |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
233 } |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
234 } |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
235 } |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
236 } |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
237 |
3581 | 238 void float_init_altivec(DSPContext* c, AVCodecContext *avctx) |
239 { | |
240 c->vector_fmul = vector_fmul_altivec; | |
241 c->vector_fmul_reverse = vector_fmul_reverse_altivec; | |
242 c->vector_fmul_add_add = vector_fmul_add_add_altivec; | |
7675
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
243 if(!(avctx->flags & CODEC_FLAG_BITEXACT)) { |
3581 | 244 c->float_to_int16 = float_to_int16_altivec; |
7675
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
245 c->float_to_int16_interleave = float_to_int16_interleave_altivec; |
ad4bf45b9b63
Introduce float_to_int16_interleave_altivec, tested with vorbis
lu_zero
parents:
7674
diff
changeset
|
246 } |
3581 | 247 } |