Mercurial > mplayer.hg
annotate liba52/resample.c @ 3653:b11b15df02ed
3F2R sse optimized
author | michael |
---|---|
date | Sat, 22 Dec 2001 00:33:52 +0000 |
parents | e22ff7ebdc05 |
children | 33c3cff374a1 |
rev | line source |
---|---|
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
1 // this code come from a52dec/libao/audio_out_oss.c |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
2 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
3 // FIXME FIXME FIXME |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
4 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
5 // a52_resample_init should find the requested converter (from type flags -> |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
6 // given number of channels) and set up some function pointers... |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
7 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
8 // a52_resample() should do the conversion. |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
9 |
3569 | 10 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) |
11 | |
12 /* optimization TODO / NOTES | |
13 movntq is slightly faster (0.5% with the current test.c benchmark) | |
14 (but thats just test.c so that needs to be testd in reallity) | |
15 and it would mean (C / MMX2 / MMX / 3DNOW) versions | |
16 */ | |
17 | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
18 #include <inttypes.h> |
3626 | 19 #include <stdio.h> |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
20 #include "a52.h" |
3567 | 21 #include "../config.h" |
3626 | 22 #include "../cpudetect.h" |
3567 | 23 |
3626 | 24 int (* a52_resample) (float * _f, int16_t * s16)=NULL; |
25 | |
26 #ifdef ARCH_X86 | |
3574 | 27 static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
28 static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; | |
29 static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; | |
30 static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; | |
3567 | 31 #endif |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
32 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
33 static inline int16_t convert (int32_t i) |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
34 { |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
35 if (i > 0x43c07fff) |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
36 return 32767; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
37 else if (i < 0x43bf8000) |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
38 return -32768; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
39 else |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
40 return i - 0x43c00000; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
41 } |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
42 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
43 static int chans=2; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
44 static int flags=0; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
45 |
3626 | 46 int a52_resample_C(float * _f, int16_t * s16) |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
47 { |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
48 int i; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
49 int32_t * f = (int32_t *) _f; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
50 |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
51 switch (flags) { |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
52 case A52_MONO: |
3626 | 53 for (i = 0; i < 256; i++) { |
54 s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; | |
55 s16[5*i+4] = convert (f[i]); | |
56 } | |
57 break; | |
58 case A52_CHANNEL: | |
59 case A52_STEREO: | |
60 case A52_DOLBY: | |
61 for (i = 0; i < 256; i++) { | |
62 s16[2*i] = convert (f[i]); | |
63 s16[2*i+1] = convert (f[i+256]); | |
64 } | |
65 break; | |
66 case A52_3F: | |
67 for (i = 0; i < 256; i++) { | |
68 s16[5*i] = convert (f[i]); | |
69 s16[5*i+1] = convert (f[i+512]); | |
70 s16[5*i+2] = s16[5*i+3] = 0; | |
71 s16[5*i+4] = convert (f[i+256]); | |
72 } | |
73 break; | |
74 case A52_2F2R: | |
75 for (i = 0; i < 256; i++) { | |
76 s16[4*i] = convert (f[i]); | |
77 s16[4*i+1] = convert (f[i+256]); | |
78 s16[4*i+2] = convert (f[i+512]); | |
79 s16[4*i+3] = convert (f[i+768]); | |
80 } | |
81 break; | |
82 case A52_3F2R: | |
83 for (i = 0; i < 256; i++) { | |
84 s16[5*i] = convert (f[i]); | |
85 s16[5*i+1] = convert (f[i+512]); | |
86 s16[5*i+2] = convert (f[i+768]); | |
87 s16[5*i+3] = convert (f[i+1024]); | |
88 s16[5*i+4] = convert (f[i+256]); | |
89 } | |
90 break; | |
91 case A52_MONO | A52_LFE: | |
92 for (i = 0; i < 256; i++) { | |
93 s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; | |
94 s16[6*i+4] = convert (f[i+256]); | |
95 s16[6*i+5] = convert (f[i]); | |
96 } | |
97 break; | |
98 case A52_CHANNEL | A52_LFE: | |
99 case A52_STEREO | A52_LFE: | |
100 case A52_DOLBY | A52_LFE: | |
101 for (i = 0; i < 256; i++) { | |
102 s16[6*i] = convert (f[i+256]); | |
103 s16[6*i+1] = convert (f[i+512]); | |
104 s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; | |
105 s16[6*i+5] = convert (f[i]); | |
106 } | |
107 break; | |
108 case A52_3F | A52_LFE: | |
109 for (i = 0; i < 256; i++) { | |
110 s16[6*i] = convert (f[i+256]); | |
111 s16[6*i+1] = convert (f[i+768]); | |
112 s16[6*i+2] = s16[6*i+3] = 0; | |
113 s16[6*i+4] = convert (f[i+512]); | |
114 s16[6*i+5] = convert (f[i]); | |
115 } | |
116 break; | |
117 case A52_2F2R | A52_LFE: | |
118 for (i = 0; i < 256; i++) { | |
119 s16[6*i] = convert (f[i+256]); | |
120 s16[6*i+1] = convert (f[i+512]); | |
121 s16[6*i+2] = convert (f[i+768]); | |
122 s16[6*i+3] = convert (f[i+1024]); | |
123 s16[6*i+4] = 0; | |
124 s16[6*i+5] = convert (f[i]); | |
125 } | |
126 break; | |
127 case A52_3F2R | A52_LFE: | |
128 for (i = 0; i < 256; i++) { | |
129 s16[6*i] = convert (f[i+256]); | |
130 s16[6*i+1] = convert (f[i+768]); | |
131 s16[6*i+2] = convert (f[i+1024]); | |
132 s16[6*i+3] = convert (f[i+1280]); | |
133 s16[6*i+4] = convert (f[i+512]); | |
134 s16[6*i+5] = convert (f[i]); | |
135 } | |
136 break; | |
137 } | |
138 return chans*256; | |
139 } | |
140 | |
141 #ifdef ARCH_X86 | |
142 int a52_resample_MMX(float * _f, int16_t * s16) | |
143 { | |
144 int i; | |
145 int32_t * f = (int32_t *) _f; | |
146 | |
147 switch (flags) { | |
148 case A52_MONO: | |
3574 | 149 asm volatile( |
150 "movl $-512, %%esi \n\t" | |
151 "movq magicF2W, %%mm7 \n\t" | |
152 "movq wm1100, %%mm3 \n\t" | |
153 "movq wm0101, %%mm4 \n\t" | |
154 "movq wm1010, %%mm5 \n\t" | |
155 "pxor %%mm6, %%mm6 \n\t" | |
156 "1: \n\t" | |
157 "movq (%1, %%esi, 2), %%mm0 \n\t" | |
158 "movq 8(%1, %%esi, 2), %%mm1 \n\t" | |
159 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
160 "psubd %%mm7, %%mm0 \n\t" | |
161 "psubd %%mm7, %%mm1 \n\t" | |
162 "packssdw %%mm1, %%mm0 \n\t" | |
163 "movq %%mm0, %%mm1 \n\t" | |
164 "pand %%mm4, %%mm0 \n\t" | |
165 "pand %%mm5, %%mm1 \n\t" | |
166 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 | |
167 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 | |
168 "pand %%mm3, %%mm0 \n\t" | |
169 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 | |
170 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B | |
171 "pand %%mm3, %%mm1 \n\t" | |
172 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 | |
173 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 | |
174 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B | |
175 "addl $8, %%esi \n\t" | |
176 " jnz 1b \n\t" | |
177 "emms \n\t" | |
178 :: "r" (s16+1280), "r" (f+256) | |
179 :"%esi", "%edi", "memory" | |
180 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
181 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
182 case A52_CHANNEL: |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
183 case A52_STEREO: |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
184 case A52_DOLBY: |
3567 | 185 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
186 #ifdef HAVE_SSE | |
187 asm volatile( | |
188 "movl $-1024, %%esi \n\t" | |
189 "1: \n\t" | |
190 "cvtps2pi (%1, %%esi), %%mm0 \n\t" | |
191 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" | |
192 "movq %%mm0, %%mm1 \n\t" | |
193 "punpcklwd %%mm2, %%mm0 \n\t" | |
194 "punpckhwd %%mm2, %%mm1 \n\t" | |
195 "movq %%mm0, (%0, %%esi) \n\t" | |
196 "movq %%mm1, 8(%0, %%esi) \n\t" | |
197 "addl $16, %%esi \n\t" | |
198 " jnz 1b \n\t" | |
199 "emms \n\t" | |
200 :: "r" (s16+512), "r" (f+256) | |
201 :"%esi", "memory" | |
202 );*/ | |
203 asm volatile( | |
204 "movl $-1024, %%esi \n\t" | |
205 "movq magicF2W, %%mm7 \n\t" | |
206 "1: \n\t" | |
207 "movq (%1, %%esi), %%mm0 \n\t" | |
208 "movq 8(%1, %%esi), %%mm1 \n\t" | |
209 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
210 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
211 "psubd %%mm7, %%mm0 \n\t" | |
212 "psubd %%mm7, %%mm1 \n\t" | |
213 "psubd %%mm7, %%mm2 \n\t" | |
214 "psubd %%mm7, %%mm3 \n\t" | |
215 "packssdw %%mm1, %%mm0 \n\t" | |
216 "packssdw %%mm3, %%mm2 \n\t" | |
217 "movq %%mm0, %%mm1 \n\t" | |
218 "punpcklwd %%mm2, %%mm0 \n\t" | |
219 "punpckhwd %%mm2, %%mm1 \n\t" | |
220 "movq %%mm0, (%0, %%esi) \n\t" | |
221 "movq %%mm1, 8(%0, %%esi) \n\t" | |
222 "addl $16, %%esi \n\t" | |
223 " jnz 1b \n\t" | |
224 "emms \n\t" | |
225 :: "r" (s16+512), "r" (f+256) | |
226 :"%esi", "memory" | |
227 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
228 break; |
3626 | 229 case A52_3F: //FIXME Optimize |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
230 for (i = 0; i < 256; i++) { |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
231 s16[5*i] = convert (f[i]); |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
232 s16[5*i+1] = convert (f[i+512]); |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
233 s16[5*i+2] = s16[5*i+3] = 0; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
234 s16[5*i+4] = convert (f[i+256]); |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
235 } |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
236 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
237 case A52_2F2R: |
3569 | 238 asm volatile( |
239 "movl $-1024, %%esi \n\t" | |
240 "movq magicF2W, %%mm7 \n\t" | |
241 "1: \n\t" | |
242 "movq (%1, %%esi), %%mm0 \n\t" | |
243 "movq 8(%1, %%esi), %%mm1 \n\t" | |
244 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
245 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
246 "psubd %%mm7, %%mm0 \n\t" | |
247 "psubd %%mm7, %%mm1 \n\t" | |
248 "psubd %%mm7, %%mm2 \n\t" | |
249 "psubd %%mm7, %%mm3 \n\t" | |
250 "packssdw %%mm1, %%mm0 \n\t" | |
251 "packssdw %%mm3, %%mm2 \n\t" | |
252 "movq 2048(%1, %%esi), %%mm3 \n\t" | |
253 "movq 2056(%1, %%esi), %%mm4 \n\t" | |
254 "movq 3072(%1, %%esi), %%mm5 \n\t" | |
255 "movq 3080(%1, %%esi), %%mm6 \n\t" | |
256 "psubd %%mm7, %%mm3 \n\t" | |
257 "psubd %%mm7, %%mm4 \n\t" | |
258 "psubd %%mm7, %%mm5 \n\t" | |
259 "psubd %%mm7, %%mm6 \n\t" | |
260 "packssdw %%mm4, %%mm3 \n\t" | |
261 "packssdw %%mm6, %%mm5 \n\t" | |
262 "movq %%mm0, %%mm1 \n\t" | |
263 "movq %%mm3, %%mm4 \n\t" | |
264 "punpcklwd %%mm2, %%mm0 \n\t" | |
265 "punpckhwd %%mm2, %%mm1 \n\t" | |
266 "punpcklwd %%mm5, %%mm3 \n\t" | |
267 "punpckhwd %%mm5, %%mm4 \n\t" | |
268 "movq %%mm0, %%mm2 \n\t" | |
269 "movq %%mm1, %%mm5 \n\t" | |
270 "punpckldq %%mm3, %%mm0 \n\t" | |
271 "punpckhdq %%mm3, %%mm2 \n\t" | |
272 "punpckldq %%mm4, %%mm1 \n\t" | |
273 "punpckhdq %%mm4, %%mm5 \n\t" | |
274 "movq %%mm0, (%0, %%esi,2) \n\t" | |
275 "movq %%mm2, 8(%0, %%esi,2) \n\t" | |
276 "movq %%mm1, 16(%0, %%esi,2) \n\t" | |
277 "movq %%mm5, 24(%0, %%esi,2) \n\t" | |
278 "addl $16, %%esi \n\t" | |
279 " jnz 1b \n\t" | |
280 "emms \n\t" | |
281 :: "r" (s16+1024), "r" (f+256) | |
282 :"%esi", "memory" | |
283 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
284 break; |
3653 | 285 case A52_3F2R: |
286 asm volatile( | |
287 "movl $-1024, %%esi \n\t" | |
288 "movq magicF2W, %%mm7 \n\t" | |
289 "1: \n\t" | |
290 "movd (%1, %%esi), %%mm0 \n\t" | |
291 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
292 "movd 3072(%1, %%esi), %%mm1 \n\t" | |
293 "punpckldq 4096(%1, %%esi), %%mm1\n\t" | |
294 "movd 1024(%1, %%esi), %%mm2 \n\t" | |
295 "punpckldq 4(%1, %%esi), %%mm2 \n\t" | |
296 "movd 2052(%1, %%esi), %%mm3 \n\t" | |
297 "punpckldq 3076(%1, %%esi), %%mm3\n\t" | |
298 "movd 4100(%1, %%esi), %%mm4 \n\t" | |
299 "punpckldq 1028(%1, %%esi), %%mm4\n\t" | |
300 "movd 8(%1, %%esi), %%mm5 \n\t" | |
301 "punpckldq 2056(%1, %%esi), %%mm5\n\t" | |
302 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
303 "sarl $1, %%edi \n\t" | |
304 "psubd %%mm7, %%mm0 \n\t" | |
305 "psubd %%mm7, %%mm1 \n\t" | |
306 "psubd %%mm7, %%mm2 \n\t" | |
307 "psubd %%mm7, %%mm3 \n\t" | |
308 "psubd %%mm7, %%mm4 \n\t" | |
309 "psubd %%mm7, %%mm5 \n\t" | |
310 "packssdw %%mm1, %%mm0 \n\t" | |
311 "packssdw %%mm3, %%mm2 \n\t" | |
312 "packssdw %%mm5, %%mm4 \n\t" | |
313 "movq %%mm0, (%0, %%edi) \n\t" | |
314 "movq %%mm2, 8(%0, %%edi) \n\t" | |
315 "movq %%mm4, 16(%0, %%edi) \n\t" | |
316 | |
317 "movd 3080(%1, %%esi), %%mm0 \n\t" | |
318 "punpckldq 4104(%1, %%esi), %%mm0\n\t" | |
319 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
320 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
321 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
322 "punpckldq 3084(%1, %%esi), %%mm2\n\t" | |
323 "movd 4108(%1, %%esi), %%mm3 \n\t" | |
324 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
325 "psubd %%mm7, %%mm0 \n\t" | |
326 "psubd %%mm7, %%mm1 \n\t" | |
327 "psubd %%mm7, %%mm2 \n\t" | |
328 "psubd %%mm7, %%mm3 \n\t" | |
329 "packssdw %%mm1, %%mm0 \n\t" | |
330 "packssdw %%mm3, %%mm2 \n\t" | |
331 "packssdw %%mm5, %%mm4 \n\t" | |
332 "movq %%mm0, 24(%0, %%edi) \n\t" | |
333 "movq %%mm2, 32(%0, %%edi) \n\t" | |
334 "movq %%mm4, 40(%0, %%edi) \n\t" | |
335 | |
336 "addl $16, %%esi \n\t" | |
337 " jnz 1b \n\t" | |
338 "emms \n\t" | |
339 :: "r" (s16+1280), "r" (f+256) | |
340 :"%esi", "%edi", "memory" | |
341 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
342 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
343 case A52_MONO | A52_LFE: |
3569 | 344 asm volatile( |
345 "movl $-1024, %%esi \n\t" | |
346 "movq magicF2W, %%mm7 \n\t" | |
347 "pxor %%mm6, %%mm6 \n\t" | |
348 "1: \n\t" | |
349 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
350 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
351 "movq (%1, %%esi), %%mm2 \n\t" | |
352 "movq 8(%1, %%esi), %%mm3 \n\t" | |
353 "psubd %%mm7, %%mm0 \n\t" | |
354 "psubd %%mm7, %%mm1 \n\t" | |
355 "psubd %%mm7, %%mm2 \n\t" | |
356 "psubd %%mm7, %%mm3 \n\t" | |
357 "packssdw %%mm1, %%mm0 \n\t" | |
358 "packssdw %%mm3, %%mm2 \n\t" | |
359 "movq %%mm0, %%mm1 \n\t" | |
360 "punpcklwd %%mm2, %%mm0 \n\t" | |
361 "punpckhwd %%mm2, %%mm1 \n\t" | |
362 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
363 "movq %%mm6, (%0, %%edi) \n\t" | |
364 "movd %%mm0, 8(%0, %%edi) \n\t" | |
365 "punpckhdq %%mm0, %%mm0 \n\t" | |
366 "movq %%mm6, 12(%0, %%edi) \n\t" | |
367 "movd %%mm0, 20(%0, %%edi) \n\t" | |
368 "movq %%mm6, 24(%0, %%edi) \n\t" | |
369 "movd %%mm1, 32(%0, %%edi) \n\t" | |
370 "punpckhdq %%mm1, %%mm1 \n\t" | |
371 "movq %%mm6, 36(%0, %%edi) \n\t" | |
372 "movd %%mm1, 44(%0, %%edi) \n\t" | |
373 "addl $16, %%esi \n\t" | |
374 " jnz 1b \n\t" | |
375 "emms \n\t" | |
376 :: "r" (s16+1536), "r" (f+256) | |
377 :"%esi", "%edi", "memory" | |
378 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
379 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
380 case A52_CHANNEL | A52_LFE: |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
381 case A52_STEREO | A52_LFE: |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
382 case A52_DOLBY | A52_LFE: |
3576 | 383 asm volatile( |
384 "movl $-1024, %%esi \n\t" | |
385 "movq magicF2W, %%mm7 \n\t" | |
386 "pxor %%mm6, %%mm6 \n\t" | |
387 "1: \n\t" | |
388 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
389 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
390 "movq (%1, %%esi), %%mm5 \n\t" | |
391 "psubd %%mm7, %%mm0 \n\t" | |
392 "psubd %%mm7, %%mm1 \n\t" | |
393 "psubd %%mm7, %%mm5 \n\t" | |
394 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
395 | |
396 "pxor %%mm4, %%mm4 \n\t" | |
397 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
398 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
399 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
400 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
401 "movq %%mm0, %%mm1 \n\t" // BAba | |
402 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
403 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
404 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
405 | |
406 "movq %%mm0, (%0, %%edi) \n\t" // 00ba | |
407 "punpckhdq %%mm4, %%mm0 \n\t" // F000 | |
408 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 | |
409 "movq %%mm0, 16(%0, %%edi) \n\t" // F000 | |
410 "addl $8, %%esi \n\t" | |
411 " jnz 1b \n\t" | |
412 "emms \n\t" | |
413 :: "r" (s16+1536), "r" (f+256) | |
414 :"%esi", "%edi", "memory" | |
415 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
416 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
417 case A52_3F | A52_LFE: |
3578 | 418 asm volatile( |
419 "movl $-1024, %%esi \n\t" | |
420 "movq magicF2W, %%mm7 \n\t" | |
421 "pxor %%mm6, %%mm6 \n\t" | |
422 "1: \n\t" | |
423 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
424 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
425 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
426 "movq (%1, %%esi), %%mm5 \n\t" | |
427 "psubd %%mm7, %%mm0 \n\t" | |
428 "psubd %%mm7, %%mm1 \n\t" | |
429 "psubd %%mm7, %%mm4 \n\t" | |
430 "psubd %%mm7, %%mm5 \n\t" | |
431 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
432 | |
433 "packssdw %%mm4, %%mm0 \n\t" // EeAa | |
434 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
435 "movq %%mm0, %%mm2 \n\t" // EeAa | |
436 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
437 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
438 "movq %%mm0, %%mm1 \n\t" // BAba | |
439 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
440 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
441 | |
442 "movq %%mm0, (%0, %%edi) \n\t" | |
443 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 | |
444 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
445 "movq %%mm2, 8(%0, %%edi) \n\t" | |
446 "movq %%mm0, 16(%0, %%edi) \n\t" | |
447 "addl $8, %%esi \n\t" | |
448 " jnz 1b \n\t" | |
449 "emms \n\t" | |
450 :: "r" (s16+1536), "r" (f+256) | |
451 :"%esi", "%edi", "memory" | |
452 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
453 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
454 case A52_2F2R | A52_LFE: |
3577 | 455 asm volatile( |
456 "movl $-1024, %%esi \n\t" | |
457 "movq magicF2W, %%mm7 \n\t" | |
458 // "pxor %%mm6, %%mm6 \n\t" | |
459 "1: \n\t" | |
460 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
461 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
462 "movq 3072(%1, %%esi), %%mm2 \n\t" | |
463 "movq 4096(%1, %%esi), %%mm3 \n\t" | |
464 "movq (%1, %%esi), %%mm5 \n\t" | |
465 "psubd %%mm7, %%mm0 \n\t" | |
466 "psubd %%mm7, %%mm1 \n\t" | |
467 "psubd %%mm7, %%mm2 \n\t" | |
468 "psubd %%mm7, %%mm3 \n\t" | |
469 "psubd %%mm7, %%mm5 \n\t" | |
470 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
471 | |
472 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
473 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
474 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
475 "movq %%mm0, %%mm2 \n\t" // CcAa | |
476 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
477 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
478 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
479 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
480 "movq %%mm0, %%mm1 \n\t" // BAba | |
481 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
482 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
483 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
484 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
485 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
486 | |
487 "movq %%mm0, (%0, %%edi) \n\t" | |
488 "movq %%mm4, 8(%0, %%edi) \n\t" | |
489 "movq %%mm2, 16(%0, %%edi) \n\t" | |
490 "addl $8, %%esi \n\t" | |
491 " jnz 1b \n\t" | |
492 "emms \n\t" | |
493 :: "r" (s16+1536), "r" (f+256) | |
494 :"%esi", "%edi", "memory" | |
495 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
496 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
497 case A52_3F2R | A52_LFE: |
3575 | 498 asm volatile( |
499 "movl $-1024, %%esi \n\t" | |
500 "movq magicF2W, %%mm7 \n\t" | |
501 // "pxor %%mm6, %%mm6 \n\t" | |
502 "1: \n\t" | |
503 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
504 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
505 "movq 4096(%1, %%esi), %%mm2 \n\t" | |
506 "movq 5120(%1, %%esi), %%mm3 \n\t" | |
507 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
508 "movq (%1, %%esi), %%mm5 \n\t" | |
509 "psubd %%mm7, %%mm0 \n\t" | |
510 "psubd %%mm7, %%mm1 \n\t" | |
511 "psubd %%mm7, %%mm2 \n\t" | |
512 "psubd %%mm7, %%mm3 \n\t" | |
513 "psubd %%mm7, %%mm4 \n\t" | |
514 "psubd %%mm7, %%mm5 \n\t" | |
515 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
516 | |
517 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
518 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
519 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
520 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
521 "movq %%mm0, %%mm2 \n\t" // CcAa | |
522 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
523 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
524 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
525 "movq %%mm0, %%mm1 \n\t" // BAba | |
526 "movq %%mm4, %%mm3 \n\t" // FEfe | |
527 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
528 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
529 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
530 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
531 | |
532 "movq %%mm0, (%0, %%edi) \n\t" | |
533 "movq %%mm4, 8(%0, %%edi) \n\t" | |
534 "movq %%mm2, 16(%0, %%edi) \n\t" | |
535 "addl $8, %%esi \n\t" | |
536 " jnz 1b \n\t" | |
537 "emms \n\t" | |
538 :: "r" (s16+1536), "r" (f+256) | |
539 :"%esi", "%edi", "memory" | |
540 ); | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
541 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
542 } |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
543 return chans*256; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
544 } |
3626 | 545 #endif //arch_x86 |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
546 |
3626 | 547 void a52_resample_init(int _flags,int _chans){ |
548 chans=_chans; | |
549 flags=_flags; | |
550 | |
551 if(a52_resample==NULL) // only once please ;) | |
552 { | |
553 if(gCpuCaps.hasMMX) fprintf(stderr, "Using MMX optimized resampler\n"); | |
554 else fprintf(stderr, "No accelerated resampler found\n"); | |
555 } | |
556 | |
557 #ifdef ARCH_X86 | |
558 if(gCpuCaps.hasMMX) a52_resample= a52_resample_MMX; | |
559 #else | |
560 if(0); | |
561 #endif | |
562 else a52_resample= a52_resample_C; | |
563 } | |
564 |