Mercurial > mplayer.hg
annotate liba52/resample_c.c @ 3722:1bd9efb54a62
*** empty log message ***
author | eyck |
---|---|
date | Mon, 24 Dec 2001 23:54:27 +0000 |
parents | 33c3cff374a1 |
children | 6312aa265429 |
rev | line source |
---|---|
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
1 // this code come from a52dec/libao/audio_out_oss.c |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
2 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
3 // FIXME FIXME FIXME |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
4 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
5 // a52_resample_init should find the requested converter (from type flags -> |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
6 // given number of channels) and set up some function pointers... |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
7 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
8 // a52_resample() should do the conversion. |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
9 |
3569 | 10 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) |
11 | |
12 /* optimization TODO / NOTES | |
13 movntq is slightly faster (0.5% with the current test.c benchmark) | |
14 (but thats just test.c so that needs to be testd in reallity) | |
15 and it would mean (C / MMX2 / MMX / 3DNOW) versions | |
16 */ | |
17 | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
18 #include <inttypes.h> |
3626 | 19 #include <stdio.h> |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
20 #include "a52.h" |
3567 | 21 #include "../config.h" |
3626 | 22 #include "../cpudetect.h" |
3567 | 23 |
3626 | 24 int (* a52_resample) (float * _f, int16_t * s16)=NULL; |
25 | |
26 #ifdef ARCH_X86 | |
3574 | 27 static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
28 static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; | |
29 static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; | |
30 static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; | |
3567 | 31 #endif |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
32 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
33 static inline int16_t convert (int32_t i) |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
34 { |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
35 if (i > 0x43c07fff) |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
36 return 32767; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
37 else if (i < 0x43bf8000) |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
38 return -32768; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
39 else |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
40 return i - 0x43c00000; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
41 } |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
42 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
43 static int chans=2; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
44 static int flags=0; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
45 |
3626 | 46 int a52_resample_C(float * _f, int16_t * s16) |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
47 { |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
48 int i; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
49 int32_t * f = (int32_t *) _f; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
50 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
51 switch (flags) { |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
52 case A52_MONO: |
3626 | 53 for (i = 0; i < 256; i++) { |
54 s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; | |
55 s16[5*i+4] = convert (f[i]); | |
56 } | |
57 break; | |
58 case A52_CHANNEL: | |
59 case A52_STEREO: | |
60 case A52_DOLBY: | |
61 for (i = 0; i < 256; i++) { | |
62 s16[2*i] = convert (f[i]); | |
63 s16[2*i+1] = convert (f[i+256]); | |
64 } | |
65 break; | |
66 case A52_3F: | |
67 for (i = 0; i < 256; i++) { | |
68 s16[5*i] = convert (f[i]); | |
69 s16[5*i+1] = convert (f[i+512]); | |
70 s16[5*i+2] = s16[5*i+3] = 0; | |
71 s16[5*i+4] = convert (f[i+256]); | |
72 } | |
73 break; | |
74 case A52_2F2R: | |
75 for (i = 0; i < 256; i++) { | |
76 s16[4*i] = convert (f[i]); | |
77 s16[4*i+1] = convert (f[i+256]); | |
78 s16[4*i+2] = convert (f[i+512]); | |
79 s16[4*i+3] = convert (f[i+768]); | |
80 } | |
81 break; | |
82 case A52_3F2R: | |
83 for (i = 0; i < 256; i++) { | |
84 s16[5*i] = convert (f[i]); | |
85 s16[5*i+1] = convert (f[i+512]); | |
86 s16[5*i+2] = convert (f[i+768]); | |
87 s16[5*i+3] = convert (f[i+1024]); | |
88 s16[5*i+4] = convert (f[i+256]); | |
89 } | |
90 break; | |
91 case A52_MONO | A52_LFE: | |
92 for (i = 0; i < 256; i++) { | |
93 s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; | |
94 s16[6*i+4] = convert (f[i+256]); | |
95 s16[6*i+5] = convert (f[i]); | |
96 } | |
97 break; | |
98 case A52_CHANNEL | A52_LFE: | |
99 case A52_STEREO | A52_LFE: | |
100 case A52_DOLBY | A52_LFE: | |
101 for (i = 0; i < 256; i++) { | |
102 s16[6*i] = convert (f[i+256]); | |
103 s16[6*i+1] = convert (f[i+512]); | |
104 s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; | |
105 s16[6*i+5] = convert (f[i]); | |
106 } | |
107 break; | |
108 case A52_3F | A52_LFE: | |
109 for (i = 0; i < 256; i++) { | |
110 s16[6*i] = convert (f[i+256]); | |
111 s16[6*i+1] = convert (f[i+768]); | |
112 s16[6*i+2] = s16[6*i+3] = 0; | |
113 s16[6*i+4] = convert (f[i+512]); | |
114 s16[6*i+5] = convert (f[i]); | |
115 } | |
116 break; | |
117 case A52_2F2R | A52_LFE: | |
118 for (i = 0; i < 256; i++) { | |
119 s16[6*i] = convert (f[i+256]); | |
120 s16[6*i+1] = convert (f[i+512]); | |
121 s16[6*i+2] = convert (f[i+768]); | |
122 s16[6*i+3] = convert (f[i+1024]); | |
123 s16[6*i+4] = 0; | |
124 s16[6*i+5] = convert (f[i]); | |
125 } | |
126 break; | |
127 case A52_3F2R | A52_LFE: | |
128 for (i = 0; i < 256; i++) { | |
129 s16[6*i] = convert (f[i+256]); | |
130 s16[6*i+1] = convert (f[i+768]); | |
131 s16[6*i+2] = convert (f[i+1024]); | |
132 s16[6*i+3] = convert (f[i+1280]); | |
133 s16[6*i+4] = convert (f[i+512]); | |
134 s16[6*i+5] = convert (f[i]); | |
135 } | |
136 break; | |
137 } | |
138 return chans*256; | |
139 } | |
140 | |
141 #ifdef ARCH_X86 | |
142 int a52_resample_MMX(float * _f, int16_t * s16) | |
143 { | |
144 int i; | |
145 int32_t * f = (int32_t *) _f; | |
146 | |
147 switch (flags) { | |
148 case A52_MONO: | |
3574 | 149 asm volatile( |
150 "movl $-512, %%esi \n\t" | |
151 "movq magicF2W, %%mm7 \n\t" | |
152 "movq wm1100, %%mm3 \n\t" | |
153 "movq wm0101, %%mm4 \n\t" | |
154 "movq wm1010, %%mm5 \n\t" | |
155 "pxor %%mm6, %%mm6 \n\t" | |
156 "1: \n\t" | |
157 "movq (%1, %%esi, 2), %%mm0 \n\t" | |
158 "movq 8(%1, %%esi, 2), %%mm1 \n\t" | |
159 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
160 "psubd %%mm7, %%mm0 \n\t" | |
161 "psubd %%mm7, %%mm1 \n\t" | |
162 "packssdw %%mm1, %%mm0 \n\t" | |
163 "movq %%mm0, %%mm1 \n\t" | |
164 "pand %%mm4, %%mm0 \n\t" | |
165 "pand %%mm5, %%mm1 \n\t" | |
166 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 | |
167 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 | |
168 "pand %%mm3, %%mm0 \n\t" | |
169 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 | |
170 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B | |
171 "pand %%mm3, %%mm1 \n\t" | |
172 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 | |
173 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 | |
174 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B | |
175 "addl $8, %%esi \n\t" | |
176 " jnz 1b \n\t" | |
177 "emms \n\t" | |
178 :: "r" (s16+1280), "r" (f+256) | |
179 :"%esi", "%edi", "memory" | |
180 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
181 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
182 case A52_CHANNEL: |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
183 case A52_STEREO: |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
184 case A52_DOLBY: |
3567 | 185 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
186 #ifdef HAVE_SSE | |
187 asm volatile( | |
188 "movl $-1024, %%esi \n\t" | |
189 "1: \n\t" | |
190 "cvtps2pi (%1, %%esi), %%mm0 \n\t" | |
191 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" | |
192 "movq %%mm0, %%mm1 \n\t" | |
193 "punpcklwd %%mm2, %%mm0 \n\t" | |
194 "punpckhwd %%mm2, %%mm1 \n\t" | |
195 "movq %%mm0, (%0, %%esi) \n\t" | |
196 "movq %%mm1, 8(%0, %%esi) \n\t" | |
197 "addl $16, %%esi \n\t" | |
198 " jnz 1b \n\t" | |
199 "emms \n\t" | |
200 :: "r" (s16+512), "r" (f+256) | |
201 :"%esi", "memory" | |
202 );*/ | |
203 asm volatile( | |
204 "movl $-1024, %%esi \n\t" | |
205 "movq magicF2W, %%mm7 \n\t" | |
206 "1: \n\t" | |
207 "movq (%1, %%esi), %%mm0 \n\t" | |
208 "movq 8(%1, %%esi), %%mm1 \n\t" | |
209 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
210 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
211 "psubd %%mm7, %%mm0 \n\t" | |
212 "psubd %%mm7, %%mm1 \n\t" | |
213 "psubd %%mm7, %%mm2 \n\t" | |
214 "psubd %%mm7, %%mm3 \n\t" | |
215 "packssdw %%mm1, %%mm0 \n\t" | |
216 "packssdw %%mm3, %%mm2 \n\t" | |
217 "movq %%mm0, %%mm1 \n\t" | |
218 "punpcklwd %%mm2, %%mm0 \n\t" | |
219 "punpckhwd %%mm2, %%mm1 \n\t" | |
220 "movq %%mm0, (%0, %%esi) \n\t" | |
221 "movq %%mm1, 8(%0, %%esi) \n\t" | |
222 "addl $16, %%esi \n\t" | |
223 " jnz 1b \n\t" | |
224 "emms \n\t" | |
225 :: "r" (s16+512), "r" (f+256) | |
226 :"%esi", "memory" | |
227 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
228 break; |
3654 | 229 case A52_3F: |
230 asm volatile( | |
231 "movl $-1024, %%esi \n\t" | |
232 "movq magicF2W, %%mm7 \n\t" | |
233 "pxor %%mm6, %%mm6 \n\t" | |
234 "movq %%mm7, %%mm5 \n\t" | |
235 "punpckldq %%mm6, %%mm5 \n\t" | |
236 "1: \n\t" | |
237 "movd (%1, %%esi), %%mm0 \n\t" | |
238 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
239 "movd 1024(%1, %%esi), %%mm1 \n\t" | |
240 "punpckldq 4(%1, %%esi), %%mm1 \n\t" | |
241 "movd 2052(%1, %%esi), %%mm2 \n\t" | |
242 "movq %%mm7, %%mm3 \n\t" | |
243 "punpckldq 1028(%1, %%esi), %%mm3\n\t" | |
244 "movd 8(%1, %%esi), %%mm4 \n\t" | |
245 "punpckldq 2056(%1, %%esi), %%mm4\n\t" | |
246 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
247 "sarl $1, %%edi \n\t" | |
248 "psubd %%mm7, %%mm0 \n\t" | |
249 "psubd %%mm7, %%mm1 \n\t" | |
250 "psubd %%mm5, %%mm2 \n\t" | |
251 "psubd %%mm7, %%mm3 \n\t" | |
252 "psubd %%mm7, %%mm4 \n\t" | |
253 "packssdw %%mm6, %%mm0 \n\t" | |
254 "packssdw %%mm2, %%mm1 \n\t" | |
255 "packssdw %%mm4, %%mm3 \n\t" | |
256 "movq %%mm0, (%0, %%edi) \n\t" | |
257 "movq %%mm1, 8(%0, %%edi) \n\t" | |
258 "movq %%mm3, 16(%0, %%edi) \n\t" | |
259 | |
260 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
261 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
262 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
263 "movq %%mm7, %%mm3 \n\t" | |
264 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
265 "pxor %%mm0, %%mm0 \n\t" | |
266 "psubd %%mm7, %%mm1 \n\t" | |
267 "psubd %%mm5, %%mm2 \n\t" | |
268 "psubd %%mm7, %%mm3 \n\t" | |
269 "packssdw %%mm1, %%mm0 \n\t" | |
270 "packssdw %%mm3, %%mm2 \n\t" | |
271 "movq %%mm0, 24(%0, %%edi) \n\t" | |
272 "movq %%mm2, 32(%0, %%edi) \n\t" | |
273 | |
274 "addl $16, %%esi \n\t" | |
275 " jnz 1b \n\t" | |
276 "emms \n\t" | |
277 :: "r" (s16+1280), "r" (f+256) | |
278 :"%esi", "%edi", "memory" | |
279 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
280 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
281 case A52_2F2R: |
3569 | 282 asm volatile( |
283 "movl $-1024, %%esi \n\t" | |
284 "movq magicF2W, %%mm7 \n\t" | |
285 "1: \n\t" | |
286 "movq (%1, %%esi), %%mm0 \n\t" | |
287 "movq 8(%1, %%esi), %%mm1 \n\t" | |
288 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
289 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
290 "psubd %%mm7, %%mm0 \n\t" | |
291 "psubd %%mm7, %%mm1 \n\t" | |
292 "psubd %%mm7, %%mm2 \n\t" | |
293 "psubd %%mm7, %%mm3 \n\t" | |
294 "packssdw %%mm1, %%mm0 \n\t" | |
295 "packssdw %%mm3, %%mm2 \n\t" | |
296 "movq 2048(%1, %%esi), %%mm3 \n\t" | |
297 "movq 2056(%1, %%esi), %%mm4 \n\t" | |
298 "movq 3072(%1, %%esi), %%mm5 \n\t" | |
299 "movq 3080(%1, %%esi), %%mm6 \n\t" | |
300 "psubd %%mm7, %%mm3 \n\t" | |
301 "psubd %%mm7, %%mm4 \n\t" | |
302 "psubd %%mm7, %%mm5 \n\t" | |
303 "psubd %%mm7, %%mm6 \n\t" | |
304 "packssdw %%mm4, %%mm3 \n\t" | |
305 "packssdw %%mm6, %%mm5 \n\t" | |
306 "movq %%mm0, %%mm1 \n\t" | |
307 "movq %%mm3, %%mm4 \n\t" | |
308 "punpcklwd %%mm2, %%mm0 \n\t" | |
309 "punpckhwd %%mm2, %%mm1 \n\t" | |
310 "punpcklwd %%mm5, %%mm3 \n\t" | |
311 "punpckhwd %%mm5, %%mm4 \n\t" | |
312 "movq %%mm0, %%mm2 \n\t" | |
313 "movq %%mm1, %%mm5 \n\t" | |
314 "punpckldq %%mm3, %%mm0 \n\t" | |
315 "punpckhdq %%mm3, %%mm2 \n\t" | |
316 "punpckldq %%mm4, %%mm1 \n\t" | |
317 "punpckhdq %%mm4, %%mm5 \n\t" | |
318 "movq %%mm0, (%0, %%esi,2) \n\t" | |
319 "movq %%mm2, 8(%0, %%esi,2) \n\t" | |
320 "movq %%mm1, 16(%0, %%esi,2) \n\t" | |
321 "movq %%mm5, 24(%0, %%esi,2) \n\t" | |
322 "addl $16, %%esi \n\t" | |
323 " jnz 1b \n\t" | |
324 "emms \n\t" | |
325 :: "r" (s16+1024), "r" (f+256) | |
326 :"%esi", "memory" | |
327 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
328 break; |
3653 | 329 case A52_3F2R: |
330 asm volatile( | |
331 "movl $-1024, %%esi \n\t" | |
332 "movq magicF2W, %%mm7 \n\t" | |
333 "1: \n\t" | |
334 "movd (%1, %%esi), %%mm0 \n\t" | |
335 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
336 "movd 3072(%1, %%esi), %%mm1 \n\t" | |
337 "punpckldq 4096(%1, %%esi), %%mm1\n\t" | |
338 "movd 1024(%1, %%esi), %%mm2 \n\t" | |
339 "punpckldq 4(%1, %%esi), %%mm2 \n\t" | |
340 "movd 2052(%1, %%esi), %%mm3 \n\t" | |
341 "punpckldq 3076(%1, %%esi), %%mm3\n\t" | |
342 "movd 4100(%1, %%esi), %%mm4 \n\t" | |
343 "punpckldq 1028(%1, %%esi), %%mm4\n\t" | |
344 "movd 8(%1, %%esi), %%mm5 \n\t" | |
345 "punpckldq 2056(%1, %%esi), %%mm5\n\t" | |
346 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
347 "sarl $1, %%edi \n\t" | |
348 "psubd %%mm7, %%mm0 \n\t" | |
349 "psubd %%mm7, %%mm1 \n\t" | |
350 "psubd %%mm7, %%mm2 \n\t" | |
351 "psubd %%mm7, %%mm3 \n\t" | |
352 "psubd %%mm7, %%mm4 \n\t" | |
353 "psubd %%mm7, %%mm5 \n\t" | |
354 "packssdw %%mm1, %%mm0 \n\t" | |
355 "packssdw %%mm3, %%mm2 \n\t" | |
356 "packssdw %%mm5, %%mm4 \n\t" | |
357 "movq %%mm0, (%0, %%edi) \n\t" | |
358 "movq %%mm2, 8(%0, %%edi) \n\t" | |
359 "movq %%mm4, 16(%0, %%edi) \n\t" | |
360 | |
361 "movd 3080(%1, %%esi), %%mm0 \n\t" | |
362 "punpckldq 4104(%1, %%esi), %%mm0\n\t" | |
363 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
364 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
365 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
366 "punpckldq 3084(%1, %%esi), %%mm2\n\t" | |
367 "movd 4108(%1, %%esi), %%mm3 \n\t" | |
368 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
369 "psubd %%mm7, %%mm0 \n\t" | |
370 "psubd %%mm7, %%mm1 \n\t" | |
371 "psubd %%mm7, %%mm2 \n\t" | |
372 "psubd %%mm7, %%mm3 \n\t" | |
373 "packssdw %%mm1, %%mm0 \n\t" | |
374 "packssdw %%mm3, %%mm2 \n\t" | |
375 "movq %%mm0, 24(%0, %%edi) \n\t" | |
376 "movq %%mm2, 32(%0, %%edi) \n\t" | |
377 | |
378 "addl $16, %%esi \n\t" | |
379 " jnz 1b \n\t" | |
380 "emms \n\t" | |
381 :: "r" (s16+1280), "r" (f+256) | |
382 :"%esi", "%edi", "memory" | |
383 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
384 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
385 case A52_MONO | A52_LFE: |
3569 | 386 asm volatile( |
387 "movl $-1024, %%esi \n\t" | |
388 "movq magicF2W, %%mm7 \n\t" | |
389 "pxor %%mm6, %%mm6 \n\t" | |
390 "1: \n\t" | |
391 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
392 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
393 "movq (%1, %%esi), %%mm2 \n\t" | |
394 "movq 8(%1, %%esi), %%mm3 \n\t" | |
395 "psubd %%mm7, %%mm0 \n\t" | |
396 "psubd %%mm7, %%mm1 \n\t" | |
397 "psubd %%mm7, %%mm2 \n\t" | |
398 "psubd %%mm7, %%mm3 \n\t" | |
399 "packssdw %%mm1, %%mm0 \n\t" | |
400 "packssdw %%mm3, %%mm2 \n\t" | |
401 "movq %%mm0, %%mm1 \n\t" | |
402 "punpcklwd %%mm2, %%mm0 \n\t" | |
403 "punpckhwd %%mm2, %%mm1 \n\t" | |
404 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
405 "movq %%mm6, (%0, %%edi) \n\t" | |
406 "movd %%mm0, 8(%0, %%edi) \n\t" | |
407 "punpckhdq %%mm0, %%mm0 \n\t" | |
408 "movq %%mm6, 12(%0, %%edi) \n\t" | |
409 "movd %%mm0, 20(%0, %%edi) \n\t" | |
410 "movq %%mm6, 24(%0, %%edi) \n\t" | |
411 "movd %%mm1, 32(%0, %%edi) \n\t" | |
412 "punpckhdq %%mm1, %%mm1 \n\t" | |
413 "movq %%mm6, 36(%0, %%edi) \n\t" | |
414 "movd %%mm1, 44(%0, %%edi) \n\t" | |
415 "addl $16, %%esi \n\t" | |
416 " jnz 1b \n\t" | |
417 "emms \n\t" | |
418 :: "r" (s16+1536), "r" (f+256) | |
419 :"%esi", "%edi", "memory" | |
420 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
421 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
422 case A52_CHANNEL | A52_LFE: |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
423 case A52_STEREO | A52_LFE: |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
424 case A52_DOLBY | A52_LFE: |
3576 | 425 asm volatile( |
426 "movl $-1024, %%esi \n\t" | |
427 "movq magicF2W, %%mm7 \n\t" | |
428 "pxor %%mm6, %%mm6 \n\t" | |
429 "1: \n\t" | |
430 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
431 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
432 "movq (%1, %%esi), %%mm5 \n\t" | |
433 "psubd %%mm7, %%mm0 \n\t" | |
434 "psubd %%mm7, %%mm1 \n\t" | |
435 "psubd %%mm7, %%mm5 \n\t" | |
436 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
437 | |
438 "pxor %%mm4, %%mm4 \n\t" | |
439 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
440 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
441 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
442 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
443 "movq %%mm0, %%mm1 \n\t" // BAba | |
444 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
445 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
446 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
447 | |
448 "movq %%mm0, (%0, %%edi) \n\t" // 00ba | |
449 "punpckhdq %%mm4, %%mm0 \n\t" // F000 | |
450 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 | |
451 "movq %%mm0, 16(%0, %%edi) \n\t" // F000 | |
452 "addl $8, %%esi \n\t" | |
453 " jnz 1b \n\t" | |
454 "emms \n\t" | |
455 :: "r" (s16+1536), "r" (f+256) | |
456 :"%esi", "%edi", "memory" | |
457 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
458 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
459 case A52_3F | A52_LFE: |
3578 | 460 asm volatile( |
461 "movl $-1024, %%esi \n\t" | |
462 "movq magicF2W, %%mm7 \n\t" | |
463 "pxor %%mm6, %%mm6 \n\t" | |
464 "1: \n\t" | |
465 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
466 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
467 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
468 "movq (%1, %%esi), %%mm5 \n\t" | |
469 "psubd %%mm7, %%mm0 \n\t" | |
470 "psubd %%mm7, %%mm1 \n\t" | |
471 "psubd %%mm7, %%mm4 \n\t" | |
472 "psubd %%mm7, %%mm5 \n\t" | |
473 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
474 | |
475 "packssdw %%mm4, %%mm0 \n\t" // EeAa | |
476 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
477 "movq %%mm0, %%mm2 \n\t" // EeAa | |
478 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
479 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
480 "movq %%mm0, %%mm1 \n\t" // BAba | |
481 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
482 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
483 | |
484 "movq %%mm0, (%0, %%edi) \n\t" | |
485 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 | |
486 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
487 "movq %%mm2, 8(%0, %%edi) \n\t" | |
488 "movq %%mm0, 16(%0, %%edi) \n\t" | |
489 "addl $8, %%esi \n\t" | |
490 " jnz 1b \n\t" | |
491 "emms \n\t" | |
492 :: "r" (s16+1536), "r" (f+256) | |
493 :"%esi", "%edi", "memory" | |
494 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
495 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
496 case A52_2F2R | A52_LFE: |
3577 | 497 asm volatile( |
498 "movl $-1024, %%esi \n\t" | |
499 "movq magicF2W, %%mm7 \n\t" | |
500 // "pxor %%mm6, %%mm6 \n\t" | |
501 "1: \n\t" | |
502 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
503 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
504 "movq 3072(%1, %%esi), %%mm2 \n\t" | |
505 "movq 4096(%1, %%esi), %%mm3 \n\t" | |
506 "movq (%1, %%esi), %%mm5 \n\t" | |
507 "psubd %%mm7, %%mm0 \n\t" | |
508 "psubd %%mm7, %%mm1 \n\t" | |
509 "psubd %%mm7, %%mm2 \n\t" | |
510 "psubd %%mm7, %%mm3 \n\t" | |
511 "psubd %%mm7, %%mm5 \n\t" | |
512 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
513 | |
514 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
515 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
516 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
517 "movq %%mm0, %%mm2 \n\t" // CcAa | |
518 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
519 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
520 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
521 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
522 "movq %%mm0, %%mm1 \n\t" // BAba | |
523 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
524 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
525 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
526 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
527 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
528 | |
529 "movq %%mm0, (%0, %%edi) \n\t" | |
530 "movq %%mm4, 8(%0, %%edi) \n\t" | |
531 "movq %%mm2, 16(%0, %%edi) \n\t" | |
532 "addl $8, %%esi \n\t" | |
533 " jnz 1b \n\t" | |
534 "emms \n\t" | |
535 :: "r" (s16+1536), "r" (f+256) | |
536 :"%esi", "%edi", "memory" | |
537 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
538 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
539 case A52_3F2R | A52_LFE: |
3575 | 540 asm volatile( |
541 "movl $-1024, %%esi \n\t" | |
542 "movq magicF2W, %%mm7 \n\t" | |
543 // "pxor %%mm6, %%mm6 \n\t" | |
544 "1: \n\t" | |
545 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
546 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
547 "movq 4096(%1, %%esi), %%mm2 \n\t" | |
548 "movq 5120(%1, %%esi), %%mm3 \n\t" | |
549 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
550 "movq (%1, %%esi), %%mm5 \n\t" | |
551 "psubd %%mm7, %%mm0 \n\t" | |
552 "psubd %%mm7, %%mm1 \n\t" | |
553 "psubd %%mm7, %%mm2 \n\t" | |
554 "psubd %%mm7, %%mm3 \n\t" | |
555 "psubd %%mm7, %%mm4 \n\t" | |
556 "psubd %%mm7, %%mm5 \n\t" | |
557 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
558 | |
559 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
560 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
561 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
562 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
563 "movq %%mm0, %%mm2 \n\t" // CcAa | |
564 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
565 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
566 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
567 "movq %%mm0, %%mm1 \n\t" // BAba | |
568 "movq %%mm4, %%mm3 \n\t" // FEfe | |
569 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
570 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
571 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
572 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
573 | |
574 "movq %%mm0, (%0, %%edi) \n\t" | |
575 "movq %%mm4, 8(%0, %%edi) \n\t" | |
576 "movq %%mm2, 16(%0, %%edi) \n\t" | |
577 "addl $8, %%esi \n\t" | |
578 " jnz 1b \n\t" | |
579 "emms \n\t" | |
580 :: "r" (s16+1536), "r" (f+256) | |
581 :"%esi", "%edi", "memory" | |
582 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
583 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
584 } |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
585 return chans*256; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
586 } |
3626 | 587 #endif //arch_x86 |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
588 |
3626 | 589 void a52_resample_init(int _flags,int _chans){ |
590 chans=_chans; | |
591 flags=_flags; | |
592 | |
593 if(a52_resample==NULL) // only once please ;) | |
594 { | |
595 if(gCpuCaps.hasMMX) fprintf(stderr, "Using MMX optimized resampler\n"); | |
596 else fprintf(stderr, "No accelerated resampler found\n"); | |
597 } | |
598 | |
599 #ifdef ARCH_X86 | |
600 if(gCpuCaps.hasMMX) a52_resample= a52_resample_MMX; | |
601 #else | |
602 if(0); | |
603 #endif | |
604 else a52_resample= a52_resample_C; | |
605 } | |
606 |