Mercurial > mplayer.hg
annotate liba52/imdct_3dnow.h @ 29130:33956c5f5005
Reemit the ID_AID_x_LANG for the track. This allows the identification of the
audio track by language code (en or es) rather than by ID (128 or 129).
patch by Kevin DeKorte, kdekorte gmail com
author | diego |
---|---|
date | Sat, 11 Apr 2009 13:51:02 +0000 |
parents | 31287e75b5d8 |
children | e83eef58b30a |
rev | line source |
---|---|
4497 | 1 /* |
25323
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
2 * 3DNOW and 3DNOWEX optimized IMDCT |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
3 * Copyright (C) 2002 Nick Kurshev |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
4 * |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
5 * This file is part of a52dec, a free ATSC A-52 stream decoder. |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
6 * See http://liba52.sourceforge.net/ for updates. |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
7 * |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
8 * a52dec is free software; you can redistribute it and/or modify |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
9 * it under the terms of the GNU General Public License as published by |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
10 * the Free Software Foundation; either version 2 of the License, or |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
11 * (at your option) any later version. |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
12 * |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
13 * a52dec is distributed in the hope that it will be useful, |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
16 * GNU General Public License for more details. |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
17 * |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
18 * You should have received a copy of the GNU General Public License |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
19 * along with this program; if not, write to the Free Software |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
8855a2568281
Relicense as GPL v2 or later like the rest of liba52.
diego
parents:
18720
diff
changeset
|
21 */ |
4497 | 22 |
23 #undef FFT_4_3DNOW | |
24 #undef FFT_8_3DNOW | |
25 #undef FFT_ASMB_3DNOW | |
26 #undef FFT_ASMB16_3DNOW | |
27 #undef FFT_128P_3DNOW | |
28 | |
28335 | 29 #if HAVE_AMD3DNOWEXT |
4497 | 30 #define FFT_4_3DNOW fft_4_3dnowex |
31 #define FFT_8_3DNOW fft_8_3dnowex | |
32 #define FFT_ASMB_3DNOW fft_asmb_3dnowex | |
33 #define FFT_ASMB16_3DNOW fft_asmb16_3dnowex | |
34 #define FFT_128P_3DNOW fft_128p_3dnowex | |
35 #else | |
36 #define FFT_4_3DNOW fft_4_3dnow | |
37 #define FFT_8_3DNOW fft_8_3dnow | |
38 #define FFT_ASMB_3DNOW fft_asmb_3dnow | |
39 #define FFT_ASMB16_3DNOW fft_asmb16_3dnow | |
40 #define FFT_128P_3DNOW fft_128p_3dnow | |
41 #endif | |
42 | |
43 static void FFT_4_3DNOW(complex_t *x) | |
44 { | |
45 /* delta_p = 1 here */ | |
46 /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} | |
47 */ | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
48 __asm__ volatile( |
4497 | 49 "movq 24(%1), %%mm3\n\t" |
50 "movq 8(%1), %%mm1\n\t" | |
51 "pxor %2, %%mm3\n\t" /* mm3.re | -mm3.im */ | |
52 "pxor %3, %%mm1\n\t" /* -mm1.re | mm1.im */ | |
53 "pfadd %%mm1, %%mm3\n\t" /* vi.im = x[3].re - x[1].re; */ | |
54 "movq %%mm3, %%mm4\n\t" /* vi.re =-x[3].im + x[1].im; mm4 = vi */ | |
28335 | 55 #if HAVE_AMD3DNOWEXT |
4497 | 56 "pswapd %%mm4, %%mm4\n\t" |
57 #else | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
58 "punpckldq %%mm4, %%mm5\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
59 "punpckhdq %%mm5, %%mm4\n\t" |
4497 | 60 #endif |
61 "movq (%1), %%mm5\n\t" /* yb.re = x[0].re - x[2].re; */ | |
62 "movq (%1), %%mm6\n\t" /* yt.re = x[0].re + x[2].re; */ | |
63 "movq 24(%1), %%mm7\n\t" /* u.re = x[3].re + x[1].re; */ | |
64 "pfsub 16(%1), %%mm5\n\t" /* yb.im = x[0].im - x[2].im; mm5 = yb */ | |
65 "pfadd 16(%1), %%mm6\n\t" /* yt.im = x[0].im + x[2].im; mm6 = yt */ | |
66 "pfadd 8(%1), %%mm7\n\t" /* u.im = x[3].im + x[1].im; mm7 = u */ | |
67 | |
68 "movq %%mm6, %%mm0\n\t" /* x[0].re = yt.re + u.re; */ | |
69 "movq %%mm5, %%mm1\n\t" /* x[1].re = yb.re + vi.re; */ | |
70 "pfadd %%mm7, %%mm0\n\t" /*x[0].im = yt.im + u.im; */ | |
71 "pfadd %%mm4, %%mm1\n\t" /* x[1].im = yb.im + vi.im; */ | |
72 "movq %%mm0, (%0)\n\t" | |
73 "movq %%mm1, 8(%0)\n\t" | |
74 | |
75 "pfsub %%mm7, %%mm6\n\t" /* x[2].re = yt.re - u.re; */ | |
76 "pfsub %%mm4, %%mm5\n\t" /* x[3].re = yb.re - vi.re; */ | |
77 "movq %%mm6, 16(%0)\n\t" /* x[2].im = yt.im - u.im; */ | |
78 "movq %%mm5, 24(%0)" /* x[3].im = yb.im - vi.im; */ | |
79 :"=r"(x) | |
80 :"0"(x), | |
81 "m"(x_plus_minus_3dnow), | |
82 "m"(x_minus_plus_3dnow) | |
83 :"memory"); | |
84 } | |
85 | |
86 static void FFT_8_3DNOW(complex_t *x) | |
87 { | |
88 /* delta_p = diag{1, sqrt(i)} here */ | |
89 /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} | |
90 */ | |
91 complex_t wT1, wB1, wB2; | |
92 | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
93 __asm__ volatile( |
4497 | 94 "movq 8(%2), %%mm0\n\t" |
95 "movq 24(%2), %%mm1\n\t" | |
96 "movq %%mm0, %0\n\t" /* wT1 = x[1]; */ | |
97 "movq %%mm1, %1\n\t" /* wB1 = x[3]; */ | |
98 :"=m"(wT1), "=m"(wB1) | |
99 :"r"(x) | |
100 :"memory"); | |
101 | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
102 __asm__ volatile( |
4497 | 103 "movq 16(%0), %%mm2\n\t" |
104 "movq 32(%0), %%mm3\n\t" | |
105 "movq %%mm2, 8(%0)\n\t" /* x[1] = x[2]; */ | |
106 "movq 48(%0), %%mm4\n\t" | |
107 "movq %%mm3, 16(%0)\n\t" /* x[2] = x[4]; */ | |
108 "movq %%mm4, 24(%0)\n\t" /* x[3] = x[6]; */ | |
109 :"=r"(x) | |
110 :"0"(x) | |
111 :"memory"); | |
112 | |
113 fft_4_3dnow(&x[0]); | |
114 | |
115 /* x[0] x[4] x[2] x[6] */ | |
116 | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
117 __asm__ volatile( |
4497 | 118 "movq 40(%1), %%mm0\n\t" |
119 "movq %%mm0, %%mm3\n\t" | |
120 "movq 56(%1), %%mm1\n\t" | |
121 "pfadd %%mm1, %%mm0\n\t" | |
122 "pfsub %%mm1, %%mm3\n\t" | |
123 "movq (%2), %%mm2\n\t" | |
124 "pfadd %%mm2, %%mm0\n\t" | |
125 "pfadd %%mm2, %%mm3\n\t" | |
126 "movq (%3), %%mm1\n\t" | |
127 "pfadd %%mm1, %%mm0\n\t" | |
128 "pfsub %%mm1, %%mm3\n\t" | |
129 "movq (%1), %%mm1\n\t" | |
130 "movq 16(%1), %%mm4\n\t" | |
131 "movq %%mm1, %%mm2\n\t" | |
28335 | 132 #if HAVE_AMD3DNOWEXT |
4497 | 133 "pswapd %%mm3, %%mm3\n\t" |
134 #else | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
135 "punpckldq %%mm3, %%mm6\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
136 "punpckhdq %%mm6, %%mm3\n\t" |
4497 | 137 #endif |
138 "pfadd %%mm0, %%mm1\n\t" | |
139 "movq %%mm4, %%mm5\n\t" | |
140 "pfsub %%mm0, %%mm2\n\t" | |
141 "pfadd %%mm3, %%mm4\n\t" | |
142 "movq %%mm1, (%0)\n\t" | |
143 "pfsub %%mm3, %%mm5\n\t" | |
144 "movq %%mm2, 32(%0)\n\t" | |
145 "movd %%mm4, 16(%0)\n\t" | |
146 "movd %%mm5, 48(%0)\n\t" | |
147 "psrlq $32, %%mm4\n\t" | |
148 "psrlq $32, %%mm5\n\t" | |
149 "movd %%mm4, 52(%0)\n\t" | |
150 "movd %%mm5, 20(%0)" | |
151 :"=r"(x) | |
152 :"0"(x), "r"(&wT1), "r"(&wB1) | |
153 :"memory"); | |
154 | |
155 /* x[1] x[5] */ | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
156 __asm__ volatile ( |
4497 | 157 "movq %6, %%mm6\n\t" |
158 "movq %5, %%mm7\n\t" | |
159 "movq %1, %%mm0\n\t" | |
160 "movq %2, %%mm1\n\t" | |
161 "movq 56(%3), %%mm3\n\t" | |
162 "pfsub 40(%3), %%mm0\n\t" | |
28335 | 163 #if HAVE_AMD3DNOWEXT |
4497 | 164 "pswapd %%mm1, %%mm1\n\t" |
165 #else | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
166 "punpckldq %%mm1, %%mm2\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
167 "punpckhdq %%mm2, %%mm1\n\t" |
4497 | 168 #endif |
169 "pxor %%mm7, %%mm1\n\t" | |
170 "pfadd %%mm1, %%mm0\n\t" | |
28335 | 171 #if HAVE_AMD3DNOWEXT |
4497 | 172 "pswapd %%mm3, %%mm3\n\t" |
173 #else | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
174 "punpckldq %%mm3, %%mm2\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
175 "punpckhdq %%mm2, %%mm3\n\t" |
4497 | 176 #endif |
177 "pxor %%mm6, %%mm3\n\t" | |
178 "pfadd %%mm3, %%mm0\n\t" | |
179 "movq %%mm0, %%mm1\n\t" | |
180 "pxor %%mm6, %%mm1\n\t" | |
181 "pfacc %%mm1, %%mm0\n\t" | |
182 "pfmul %4, %%mm0\n\t" | |
183 | |
184 "movq 40(%3), %%mm5\n\t" | |
28335 | 185 #if HAVE_AMD3DNOWEXT |
4497 | 186 "pswapd %%mm5, %%mm5\n\t" |
187 #else | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
188 "punpckldq %%mm5, %%mm1\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
189 "punpckhdq %%mm1, %%mm5\n\t" |
4497 | 190 #endif |
191 "movq %%mm5, %0\n\t" | |
192 | |
193 "movq 8(%3), %%mm1\n\t" | |
194 "movq %%mm1, %%mm2\n\t" | |
195 "pfsub %%mm0, %%mm1\n\t" | |
196 "pfadd %%mm0, %%mm2\n\t" | |
197 "movq %%mm1, 40(%3)\n\t" | |
198 "movq %%mm2, 8(%3)\n\t" | |
199 :"=m"(wB2) | |
200 :"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW), | |
201 "m"(x_plus_minus_3dnow), "m"(x_minus_plus_3dnow) | |
202 :"memory"); | |
203 | |
204 | |
205 /* x[3] x[7] */ | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
206 __asm__ volatile( |
4497 | 207 "movq %1, %%mm0\n\t" |
28335 | 208 #if HAVE_AMD3DNOWEXT |
4497 | 209 "pswapd %3, %%mm1\n\t" |
210 #else | |
211 "movq %3, %%mm1\n\t" | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
212 "punpckldq %%mm1, %%mm2\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
213 "punpckhdq %%mm2, %%mm1\n\t" |
4497 | 214 #endif |
215 "pxor %%mm6, %%mm1\n\t" | |
216 "pfadd %%mm1, %%mm0\n\t" | |
217 "movq %2, %%mm2\n\t" | |
218 "movq 56(%4), %%mm3\n\t" | |
219 "pxor %%mm7, %%mm3\n\t" | |
220 "pfadd %%mm3, %%mm2\n\t" | |
28335 | 221 #if HAVE_AMD3DNOWEXT |
4497 | 222 "pswapd %%mm2, %%mm2\n\t" |
223 #else | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
224 "punpckldq %%mm2, %%mm5\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
225 "punpckhdq %%mm5, %%mm2\n\t" |
4497 | 226 #endif |
227 "movq 24(%4), %%mm3\n\t" | |
228 "pfsub %%mm2, %%mm0\n\t" | |
229 "movq %%mm3, %%mm4\n\t" | |
230 "movq %%mm0, %%mm1\n\t" | |
231 "pxor %%mm6, %%mm0\n\t" | |
232 "pfacc %%mm1, %%mm0\n\t" | |
233 "pfmul %5, %%mm0\n\t" | |
234 "movq %%mm0, %%mm1\n\t" | |
235 "pxor %%mm6, %%mm1\n\t" | |
236 "pxor %%mm7, %%mm0\n\t" | |
237 "pfadd %%mm1, %%mm3\n\t" | |
238 "pfadd %%mm0, %%mm4\n\t" | |
239 "movq %%mm4, 24(%0)\n\t" | |
240 "movq %%mm3, 56(%0)\n\t" | |
241 :"=r"(x) | |
242 :"m"(wT1), "m"(wB2), "m"(wB1), "0"(x), "m"(HSQRT2_3DNOW) | |
243 :"memory"); | |
244 } | |
245 | |
246 static void FFT_ASMB_3DNOW(int k, complex_t *x, complex_t *wTB, | |
247 const complex_t *d, const complex_t *d_3) | |
248 { | |
249 register complex_t *x2k, *x3k, *x4k, *wB; | |
250 | |
251 TRANS_FILL_MM6_MM7_3DNOW(); | |
252 x2k = x + 2 * k; | |
253 x3k = x2k + 2 * k; | |
254 x4k = x3k + 2 * k; | |
255 wB = wTB + 2 * k; | |
256 | |
257 TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]); | |
258 TRANS_3DNOW(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]); | |
259 | |
260 --k; | |
261 for(;;) { | |
262 TRANS_3DNOW(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]); | |
263 TRANS_3DNOW(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]); | |
264 if (!--k) break; | |
265 x += 2; | |
266 x2k += 2; | |
267 x3k += 2; | |
268 x4k += 2; | |
269 d += 2; | |
270 d_3 += 2; | |
271 wTB += 2; | |
272 wB += 2; | |
273 } | |
274 | |
275 } | |
276 | |
277 void FFT_ASMB16_3DNOW(complex_t *x, complex_t *wTB) | |
278 { | |
279 int k = 2; | |
280 | |
281 TRANS_FILL_MM6_MM7_3DNOW(); | |
282 /* transform x[0], x[8], x[4], x[12] */ | |
283 TRANSZERO_3DNOW(x[0],x[4],x[8],x[12]); | |
284 | |
285 /* transform x[1], x[9], x[5], x[13] */ | |
286 TRANS_3DNOW(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]); | |
287 | |
288 /* transform x[2], x[10], x[6], x[14] */ | |
289 TRANSHALF_16_3DNOW(x[2],x[6],x[10],x[14]); | |
290 | |
291 /* transform x[3], x[11], x[7], x[15] */ | |
292 TRANS_3DNOW(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]); | |
293 | |
294 } | |
295 | |
296 static void FFT_128P_3DNOW(complex_t *a) | |
297 { | |
298 FFT_8_3DNOW(&a[0]); FFT_4_3DNOW(&a[8]); FFT_4_3DNOW(&a[12]); | |
299 FFT_ASMB16_3DNOW(&a[0], &a[8]); | |
300 | |
301 FFT_8_3DNOW(&a[16]), FFT_8_3DNOW(&a[24]); | |
302 FFT_ASMB_3DNOW(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); | |
303 | |
304 FFT_8_3DNOW(&a[32]); FFT_4_3DNOW(&a[40]); FFT_4_3DNOW(&a[44]); | |
305 FFT_ASMB16_3DNOW(&a[32], &a[40]); | |
306 | |
307 FFT_8_3DNOW(&a[48]); FFT_4_3DNOW(&a[56]); FFT_4_3DNOW(&a[60]); | |
308 FFT_ASMB16_3DNOW(&a[48], &a[56]); | |
309 | |
310 FFT_ASMB_3DNOW(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); | |
311 | |
312 FFT_8_3DNOW(&a[64]); FFT_4_3DNOW(&a[72]); FFT_4_3DNOW(&a[76]); | |
313 /* FFT_16(&a[64]); */ | |
314 FFT_ASMB16_3DNOW(&a[64], &a[72]); | |
315 | |
316 FFT_8_3DNOW(&a[80]); FFT_8_3DNOW(&a[88]); | |
317 | |
318 /* FFT_32(&a[64]); */ | |
319 FFT_ASMB_3DNOW(4, &a[64], &a[80],&delta32[0], &delta32_3[0]); | |
320 | |
321 FFT_8_3DNOW(&a[96]); FFT_4_3DNOW(&a[104]), FFT_4_3DNOW(&a[108]); | |
322 /* FFT_16(&a[96]); */ | |
323 FFT_ASMB16_3DNOW(&a[96], &a[104]); | |
324 | |
325 FFT_8_3DNOW(&a[112]), FFT_8_3DNOW(&a[120]); | |
326 /* FFT_32(&a[96]); */ | |
327 FFT_ASMB_3DNOW(4, &a[96], &a[112], &delta32[0], &delta32_3[0]); | |
328 | |
329 /* FFT_128(&a[0]); */ | |
330 FFT_ASMB_3DNOW(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); | |
331 } | |
332 | |
333 static void | |
28335 | 334 #if HAVE_AMD3DNOWEXT |
4497 | 335 imdct_do_512_3dnowex |
336 #else | |
337 imdct_do_512_3dnow | |
338 #endif | |
339 (sample_t data[],sample_t delay[], sample_t bias) | |
340 { | |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
8230
diff
changeset
|
341 int i; |
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
8230
diff
changeset
|
342 /* int k; |
4497 | 343 int p,q; |
344 int m; | |
345 int two_m; | |
346 int two_m_plus_one; | |
347 | |
348 sample_t tmp_a_i; | |
349 sample_t tmp_a_r; | |
350 sample_t tmp_b_i; | |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
8230
diff
changeset
|
351 sample_t tmp_b_r;*/ |
4497 | 352 |
353 sample_t *data_ptr; | |
354 sample_t *delay_ptr; | |
355 sample_t *window_ptr; | |
356 | |
357 /* 512 IMDCT with source and dest data in 'data' */ | |
358 | |
359 /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ | |
360 #if 1 | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
361 __asm__ volatile ( |
4497 | 362 "movq %0, %%mm7\n\t" |
363 ::"m"(x_plus_minus_3dnow) | |
364 :"memory"); | |
365 for( i=0; i < 128; i++) { | |
366 int j = pm128[i]; | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
367 __asm__ volatile ( |
4497 | 368 "movd %1, %%mm0\n\t" |
369 "movd %3, %%mm1\n\t" | |
370 "punpckldq %2, %%mm0\n\t" /* mm0 = data[256-2*j-1] | data[2*j]*/ | |
371 "punpckldq %4, %%mm1\n\t" /* mm1 = xcos[j] | xsin[j] */ | |
372 "movq %%mm0, %%mm2\n\t" | |
373 "pfmul %%mm1, %%mm0\n\t" | |
28335 | 374 #if HAVE_AMD3DNOWEXT |
4497 | 375 "pswapd %%mm1, %%mm1\n\t" |
376 #else | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
377 "punpckldq %%mm1, %%mm5\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
378 "punpckhdq %%mm5, %%mm1\n\t" |
4497 | 379 #endif |
380 "pfmul %%mm1, %%mm2\n\t" | |
28335 | 381 #if HAVE_AMD3DNOWEXT |
4497 | 382 "pfpnacc %%mm2, %%mm0\n\t" |
383 #else | |
384 "pxor %%mm7, %%mm0\n\t" | |
385 "pfacc %%mm2, %%mm0\n\t" | |
386 #endif | |
387 "pxor %%mm7, %%mm0\n\t" | |
388 "movq %%mm0, %0" | |
389 :"=m"(buf[i]) | |
390 :"m"(data[256-2*j-1]), "m"(data[2*j]), "m"(xcos1[j]), "m"(xsin1[j]) | |
391 :"memory" | |
392 ); | |
393 /* buf[i].re = (data[256-2*j-1] * xcos1[j] - data[2*j] * xsin1[j]); | |
394 buf[i].im = (data[256-2*j-1] * xsin1[j] + data[2*j] * xcos1[j])*(-1.0);*/ | |
395 } | |
396 #else | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
397 __asm__ volatile ("femms":::"memory"); |
4497 | 398 for( i=0; i < 128; i++) { |
399 /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ | |
400 int j= pm128[i]; | |
401 buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); | |
402 buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); | |
403 } | |
404 #endif | |
405 | |
406 /* FFT Merge */ | |
407 /* unoptimized variant | |
408 for (m=1; m < 7; m++) { | |
409 if(m) | |
410 two_m = (1 << m); | |
411 else | |
412 two_m = 1; | |
413 | |
414 two_m_plus_one = (1 << (m+1)); | |
415 | |
416 for(i = 0; i < 128; i += two_m_plus_one) { | |
417 for(k = 0; k < two_m; k++) { | |
418 p = k + i; | |
419 q = p + two_m; | |
420 tmp_a_r = buf[p].real; | |
421 tmp_a_i = buf[p].imag; | |
422 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag; | |
423 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag; | |
424 buf[p].real = tmp_a_r + tmp_b_r; | |
425 buf[p].imag = tmp_a_i + tmp_b_i; | |
426 buf[q].real = tmp_a_r - tmp_b_r; | |
427 buf[q].imag = tmp_a_i - tmp_b_i; | |
428 } | |
429 } | |
430 } | |
431 */ | |
432 | |
433 FFT_128P_3DNOW (&buf[0]); | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25323
diff
changeset
|
434 // __asm__ volatile ("femms \n\t":::"memory"); |
4497 | 435 |
436 /* Post IFFT complex multiply plus IFFT complex conjugate*/ | |
437 #if 1 | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
438 __asm__ volatile ( |
4497 | 439 "movq %0, %%mm7\n\t" |
440 "movq %1, %%mm6\n\t" | |
441 ::"m"(x_plus_minus_3dnow), | |
442 "m"(x_minus_plus_3dnow) | |
443 :"eax","memory"); | |
444 for (i=0; i < 128; i++) { | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
445 __asm__ volatile ( |
4497 | 446 "movq %1, %%mm0\n\t" /* ac3_buf[i].re | ac3_buf[i].im */ |
447 "movq %%mm0, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */ | |
28335 | 448 #if !HAVE_AMD3DNOWEXT |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
449 "punpckldq %%mm1, %%mm2\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
450 "punpckhdq %%mm2, %%mm1\n\t" |
4497 | 451 #else |
452 "pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */ | |
453 #endif | |
454 "movd %3, %%mm3\n\t" /* ac3_xsin[i] */ | |
455 "punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */ | |
456 "pfmul %%mm3, %%mm0\n\t" | |
457 "pfmul %%mm3, %%mm1\n\t" | |
28335 | 458 #if !HAVE_AMD3DNOWEXT |
4497 | 459 "pxor %%mm7, %%mm0\n\t" |
460 "pfacc %%mm1, %%mm0\n\t" | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
461 "punpckldq %%mm0, %%mm1\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
462 "punpckhdq %%mm1, %%mm0\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
463 "movq %%mm0, %0\n\t" |
4497 | 464 #else |
465 "pfpnacc %%mm1, %%mm0\n\t" /* mm0 = mm0[0] - mm0[1] | mm1[0] + mm1[1] */ | |
466 "pswapd %%mm0, %%mm0\n\t" | |
467 "movq %%mm0, %0" | |
468 #endif | |
469 :"=m"(buf[i]) | |
470 :"m"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i]) | |
471 :"memory"); | |
472 /* ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]); | |
473 ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);*/ | |
474 } | |
475 #else | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
476 __asm__ volatile ("femms":::"memory"); |
4497 | 477 for( i=0; i < 128; i++) { |
478 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ | |
479 tmp_a_r = buf[i].real; | |
480 tmp_a_i = -1.0 * buf[i].imag; | |
481 buf[i].real =(tmp_a_r * xcos1[i]) - (tmp_a_i * xsin1[i]); | |
482 buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]); | |
483 } | |
484 #endif | |
485 | |
486 data_ptr = data; | |
487 delay_ptr = delay; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
8254
diff
changeset
|
488 window_ptr = a52_imdct_window; |
4497 | 489 |
490 /* Window and convert to real valued signal */ | |
491 #if 1 | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25323
diff
changeset
|
492 __asm__ volatile ( |
4497 | 493 "movd (%0), %%mm3 \n\t" |
494 "punpckldq %%mm3, %%mm3 \n\t" | |
495 :: "r" (&bias) | |
496 ); | |
497 for (i=0; i< 64; i++) { | |
498 /* merge two loops in one to enable working of 2 decoders */ | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
499 __asm__ volatile ( |
4497 | 500 "movd 516(%1), %%mm0\n\t" |
501 "movd (%1), %%mm1\n\t" /**data_ptr++=-buf[64+i].im**window_ptr+++*delay_ptr++;*/ | |
502 "punpckldq (%2), %%mm0\n\t"/*data_ptr[128]=-buf[i].re*window_ptr[128]+delay_ptr[128];*/ | |
503 "punpckldq 516(%2), %%mm1\n\t" | |
504 "pfmul (%3), %%mm0\n\t"/**data_ptr++=buf[64-i-1].re**window_ptr+++*delay_ptr++;*/ | |
505 "pfmul 512(%3), %%mm1\n\t" | |
506 "pxor %%mm6, %%mm0\n\t"/*data_ptr[128]=buf[128-i-1].im*window_ptr[128]+delay_ptr[128];*/ | |
507 "pxor %%mm6, %%mm1\n\t" | |
508 "pfadd (%4), %%mm0\n\t" | |
509 "pfadd 512(%4), %%mm1\n\t" | |
510 "pfadd %%mm3, %%mm0\n\t" | |
511 "pfadd %%mm3, %%mm1\n\t" | |
512 "movq %%mm0, (%0)\n\t" | |
513 "movq %%mm1, 512(%0)" | |
514 :"=r"(data_ptr) | |
515 :"r"(&buf[i].real), "r"(&buf[64-i-1].real), "r"(window_ptr), "r"(delay_ptr), "0"(data_ptr) | |
516 :"memory"); | |
517 data_ptr += 2; | |
518 window_ptr += 2; | |
519 delay_ptr += 2; | |
520 } | |
521 window_ptr += 128; | |
522 #else | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
523 __asm__ volatile ("femms":::"memory"); |
4497 | 524 for(i=0; i< 64; i++) { |
525 *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; | |
526 *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; | |
527 } | |
528 | |
529 for(i=0; i< 64; i++) { | |
530 *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; | |
531 *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; | |
532 } | |
533 #endif | |
534 | |
535 /* The trailing edge of the window goes into the delay line */ | |
536 delay_ptr = delay; | |
537 #if 1 | |
538 for(i=0; i< 64; i++) { | |
539 /* merge two loops in one to enable working of 2 decoders */ | |
540 window_ptr -=2; | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
541 __asm__ volatile( |
4497 | 542 "movd 508(%1), %%mm0\n\t" |
543 "movd (%1), %%mm1\n\t" | |
544 "punpckldq (%2), %%mm0\n\t" | |
545 "punpckldq 508(%2), %%mm1\n\t" | |
28335 | 546 #if HAVE_AMD3DNOWEXT |
4497 | 547 "pswapd (%3), %%mm3\n\t" |
548 "pswapd -512(%3), %%mm4\n\t" | |
549 #else | |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
550 "movq (%3), %%mm3\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
551 "punpckldq %%mm3, %%mm2\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
552 "punpckhdq %%mm2, %%mm3\n\t" |
4497 | 553 "movq -512(%3), %%mm4\n\t" |
8230
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
554 "punpckldq %%mm4, %%mm2\n\t" |
330086b89d8f
minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents:
4497
diff
changeset
|
555 "punpckhdq %%mm2, %%mm4\n\t" |
4497 | 556 #endif |
557 "pfmul %%mm3, %%mm0\n\t" | |
558 "pfmul %%mm4, %%mm1\n\t" | |
559 "pxor %%mm6, %%mm0\n\t" | |
560 "pxor %%mm7, %%mm1\n\t" | |
561 "movq %%mm0, (%0)\n\t" | |
562 "movq %%mm1, 512(%0)" | |
563 :"=r"(delay_ptr) | |
564 :"r"(&buf[i].imag), "r"(&buf[64-i-1].imag), "r"(window_ptr), "0"(delay_ptr) | |
565 :"memory"); | |
566 delay_ptr += 2; | |
567 } | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
568 __asm__ volatile ("femms":::"memory"); |
4497 | 569 #else |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
570 __asm__ volatile ("femms":::"memory"); |
4497 | 571 for(i=0; i< 64; i++) { |
572 *delay_ptr++ = -buf[64+i].real * *--window_ptr; | |
573 *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; | |
574 } | |
575 | |
576 for(i=0; i<64; i++) { | |
577 *delay_ptr++ = buf[i].imag * *--window_ptr; | |
578 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; | |
579 } | |
580 #endif | |
581 } |