Mercurial > mplayer.hg
annotate liba52/resample_mmx.c @ 8881:1e40d4a2466f
Function DVDOpenVOBPath only decrypts first VOB file and since each VOB file has
separate structure for title key, all other title keys remains empty. My
fix is very conservative and simply tries to find title key for every VOB file.
Tomas Hurka <tom@hukatronic.cz>
author | arpi |
---|---|
date | Fri, 10 Jan 2003 22:48:49 +0000 |
parents | 9fc45fe0d444 |
children | f881c918739b |
rev | line source |
---|---|
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
1 |
3569 | 2 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) |
3 | |
4 /* optimization TODO / NOTES | |
5 movntq is slightly faster (0.5% with the current test.c benchmark) | |
6 (but thats just test.c so that needs to be testd in reallity) | |
7 and it would mean (C / MMX2 / MMX / 3DNOW) versions | |
8 */ | |
9 | |
3574 | 10 static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
11 static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; | |
12 static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; | |
13 static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
14 |
3909 | 15 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
16 int32_t * f = (int32_t *) _f; |
3574 | 17 asm volatile( |
18 "movl $-512, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
19 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
20 "movq "MANGLE(wm1100)", %%mm3 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
21 "movq "MANGLE(wm0101)", %%mm4 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
22 "movq "MANGLE(wm1010)", %%mm5 \n\t" |
3574 | 23 "pxor %%mm6, %%mm6 \n\t" |
24 "1: \n\t" | |
25 "movq (%1, %%esi, 2), %%mm0 \n\t" | |
26 "movq 8(%1, %%esi, 2), %%mm1 \n\t" | |
27 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
28 "psubd %%mm7, %%mm0 \n\t" | |
29 "psubd %%mm7, %%mm1 \n\t" | |
30 "packssdw %%mm1, %%mm0 \n\t" | |
31 "movq %%mm0, %%mm1 \n\t" | |
32 "pand %%mm4, %%mm0 \n\t" | |
33 "pand %%mm5, %%mm1 \n\t" | |
34 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 | |
35 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 | |
36 "pand %%mm3, %%mm0 \n\t" | |
37 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 | |
38 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B | |
39 "pand %%mm3, %%mm1 \n\t" | |
40 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 | |
41 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 | |
42 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B | |
43 "addl $8, %%esi \n\t" | |
44 " jnz 1b \n\t" | |
45 "emms \n\t" | |
46 :: "r" (s16+1280), "r" (f+256) | |
47 :"%esi", "%edi", "memory" | |
48 ); | |
3909 | 49 return 5*256; |
50 } | |
51 | |
52 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ | |
53 int32_t * f = (int32_t *) _f; | |
3567 | 54 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
55 #ifdef HAVE_SSE | |
56 asm volatile( | |
57 "movl $-1024, %%esi \n\t" | |
58 "1: \n\t" | |
59 "cvtps2pi (%1, %%esi), %%mm0 \n\t" | |
60 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" | |
61 "movq %%mm0, %%mm1 \n\t" | |
62 "punpcklwd %%mm2, %%mm0 \n\t" | |
63 "punpckhwd %%mm2, %%mm1 \n\t" | |
64 "movq %%mm0, (%0, %%esi) \n\t" | |
65 "movq %%mm1, 8(%0, %%esi) \n\t" | |
66 "addl $16, %%esi \n\t" | |
67 " jnz 1b \n\t" | |
68 "emms \n\t" | |
69 :: "r" (s16+512), "r" (f+256) | |
70 :"%esi", "memory" | |
71 );*/ | |
72 asm volatile( | |
73 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
74 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3567 | 75 "1: \n\t" |
76 "movq (%1, %%esi), %%mm0 \n\t" | |
77 "movq 8(%1, %%esi), %%mm1 \n\t" | |
78 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
79 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
80 "psubd %%mm7, %%mm0 \n\t" | |
81 "psubd %%mm7, %%mm1 \n\t" | |
82 "psubd %%mm7, %%mm2 \n\t" | |
83 "psubd %%mm7, %%mm3 \n\t" | |
84 "packssdw %%mm1, %%mm0 \n\t" | |
85 "packssdw %%mm3, %%mm2 \n\t" | |
86 "movq %%mm0, %%mm1 \n\t" | |
87 "punpcklwd %%mm2, %%mm0 \n\t" | |
88 "punpckhwd %%mm2, %%mm1 \n\t" | |
89 "movq %%mm0, (%0, %%esi) \n\t" | |
90 "movq %%mm1, 8(%0, %%esi) \n\t" | |
91 "addl $16, %%esi \n\t" | |
92 " jnz 1b \n\t" | |
93 "emms \n\t" | |
94 :: "r" (s16+512), "r" (f+256) | |
95 :"%esi", "memory" | |
96 ); | |
3909 | 97 return 2*256; |
98 } | |
99 | |
100 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ | |
101 int32_t * f = (int32_t *) _f; | |
3654 | 102 asm volatile( |
103 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
104 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3654 | 105 "pxor %%mm6, %%mm6 \n\t" |
106 "movq %%mm7, %%mm5 \n\t" | |
107 "punpckldq %%mm6, %%mm5 \n\t" | |
108 "1: \n\t" | |
109 "movd (%1, %%esi), %%mm0 \n\t" | |
110 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
111 "movd 1024(%1, %%esi), %%mm1 \n\t" | |
112 "punpckldq 4(%1, %%esi), %%mm1 \n\t" | |
113 "movd 2052(%1, %%esi), %%mm2 \n\t" | |
114 "movq %%mm7, %%mm3 \n\t" | |
115 "punpckldq 1028(%1, %%esi), %%mm3\n\t" | |
116 "movd 8(%1, %%esi), %%mm4 \n\t" | |
117 "punpckldq 2056(%1, %%esi), %%mm4\n\t" | |
118 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
119 "sarl $1, %%edi \n\t" | |
120 "psubd %%mm7, %%mm0 \n\t" | |
121 "psubd %%mm7, %%mm1 \n\t" | |
122 "psubd %%mm5, %%mm2 \n\t" | |
123 "psubd %%mm7, %%mm3 \n\t" | |
124 "psubd %%mm7, %%mm4 \n\t" | |
125 "packssdw %%mm6, %%mm0 \n\t" | |
126 "packssdw %%mm2, %%mm1 \n\t" | |
127 "packssdw %%mm4, %%mm3 \n\t" | |
128 "movq %%mm0, (%0, %%edi) \n\t" | |
129 "movq %%mm1, 8(%0, %%edi) \n\t" | |
130 "movq %%mm3, 16(%0, %%edi) \n\t" | |
131 | |
132 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
133 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
134 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
135 "movq %%mm7, %%mm3 \n\t" | |
136 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
137 "pxor %%mm0, %%mm0 \n\t" | |
138 "psubd %%mm7, %%mm1 \n\t" | |
139 "psubd %%mm5, %%mm2 \n\t" | |
140 "psubd %%mm7, %%mm3 \n\t" | |
141 "packssdw %%mm1, %%mm0 \n\t" | |
142 "packssdw %%mm3, %%mm2 \n\t" | |
143 "movq %%mm0, 24(%0, %%edi) \n\t" | |
144 "movq %%mm2, 32(%0, %%edi) \n\t" | |
145 | |
146 "addl $16, %%esi \n\t" | |
147 " jnz 1b \n\t" | |
148 "emms \n\t" | |
149 :: "r" (s16+1280), "r" (f+256) | |
150 :"%esi", "%edi", "memory" | |
151 ); | |
3909 | 152 return 5*256; |
153 } | |
154 | |
155 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ | |
156 int32_t * f = (int32_t *) _f; | |
3569 | 157 asm volatile( |
158 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
159 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 160 "1: \n\t" |
161 "movq (%1, %%esi), %%mm0 \n\t" | |
162 "movq 8(%1, %%esi), %%mm1 \n\t" | |
163 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
164 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
165 "psubd %%mm7, %%mm0 \n\t" | |
166 "psubd %%mm7, %%mm1 \n\t" | |
167 "psubd %%mm7, %%mm2 \n\t" | |
168 "psubd %%mm7, %%mm3 \n\t" | |
169 "packssdw %%mm1, %%mm0 \n\t" | |
170 "packssdw %%mm3, %%mm2 \n\t" | |
171 "movq 2048(%1, %%esi), %%mm3 \n\t" | |
172 "movq 2056(%1, %%esi), %%mm4 \n\t" | |
173 "movq 3072(%1, %%esi), %%mm5 \n\t" | |
174 "movq 3080(%1, %%esi), %%mm6 \n\t" | |
175 "psubd %%mm7, %%mm3 \n\t" | |
176 "psubd %%mm7, %%mm4 \n\t" | |
177 "psubd %%mm7, %%mm5 \n\t" | |
178 "psubd %%mm7, %%mm6 \n\t" | |
179 "packssdw %%mm4, %%mm3 \n\t" | |
180 "packssdw %%mm6, %%mm5 \n\t" | |
181 "movq %%mm0, %%mm1 \n\t" | |
182 "movq %%mm3, %%mm4 \n\t" | |
183 "punpcklwd %%mm2, %%mm0 \n\t" | |
184 "punpckhwd %%mm2, %%mm1 \n\t" | |
185 "punpcklwd %%mm5, %%mm3 \n\t" | |
186 "punpckhwd %%mm5, %%mm4 \n\t" | |
187 "movq %%mm0, %%mm2 \n\t" | |
188 "movq %%mm1, %%mm5 \n\t" | |
189 "punpckldq %%mm3, %%mm0 \n\t" | |
190 "punpckhdq %%mm3, %%mm2 \n\t" | |
191 "punpckldq %%mm4, %%mm1 \n\t" | |
192 "punpckhdq %%mm4, %%mm5 \n\t" | |
193 "movq %%mm0, (%0, %%esi,2) \n\t" | |
194 "movq %%mm2, 8(%0, %%esi,2) \n\t" | |
195 "movq %%mm1, 16(%0, %%esi,2) \n\t" | |
196 "movq %%mm5, 24(%0, %%esi,2) \n\t" | |
197 "addl $16, %%esi \n\t" | |
198 " jnz 1b \n\t" | |
199 "emms \n\t" | |
200 :: "r" (s16+1024), "r" (f+256) | |
201 :"%esi", "memory" | |
202 ); | |
3909 | 203 return 4*256; |
204 } | |
205 | |
206 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ | |
207 int32_t * f = (int32_t *) _f; | |
3653 | 208 asm volatile( |
209 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
210 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3653 | 211 "1: \n\t" |
212 "movd (%1, %%esi), %%mm0 \n\t" | |
213 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
214 "movd 3072(%1, %%esi), %%mm1 \n\t" | |
215 "punpckldq 4096(%1, %%esi), %%mm1\n\t" | |
216 "movd 1024(%1, %%esi), %%mm2 \n\t" | |
217 "punpckldq 4(%1, %%esi), %%mm2 \n\t" | |
218 "movd 2052(%1, %%esi), %%mm3 \n\t" | |
219 "punpckldq 3076(%1, %%esi), %%mm3\n\t" | |
220 "movd 4100(%1, %%esi), %%mm4 \n\t" | |
221 "punpckldq 1028(%1, %%esi), %%mm4\n\t" | |
222 "movd 8(%1, %%esi), %%mm5 \n\t" | |
223 "punpckldq 2056(%1, %%esi), %%mm5\n\t" | |
224 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
225 "sarl $1, %%edi \n\t" | |
226 "psubd %%mm7, %%mm0 \n\t" | |
227 "psubd %%mm7, %%mm1 \n\t" | |
228 "psubd %%mm7, %%mm2 \n\t" | |
229 "psubd %%mm7, %%mm3 \n\t" | |
230 "psubd %%mm7, %%mm4 \n\t" | |
231 "psubd %%mm7, %%mm5 \n\t" | |
232 "packssdw %%mm1, %%mm0 \n\t" | |
233 "packssdw %%mm3, %%mm2 \n\t" | |
234 "packssdw %%mm5, %%mm4 \n\t" | |
235 "movq %%mm0, (%0, %%edi) \n\t" | |
236 "movq %%mm2, 8(%0, %%edi) \n\t" | |
237 "movq %%mm4, 16(%0, %%edi) \n\t" | |
238 | |
239 "movd 3080(%1, %%esi), %%mm0 \n\t" | |
240 "punpckldq 4104(%1, %%esi), %%mm0\n\t" | |
241 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
242 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
243 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
244 "punpckldq 3084(%1, %%esi), %%mm2\n\t" | |
245 "movd 4108(%1, %%esi), %%mm3 \n\t" | |
246 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
247 "psubd %%mm7, %%mm0 \n\t" | |
248 "psubd %%mm7, %%mm1 \n\t" | |
249 "psubd %%mm7, %%mm2 \n\t" | |
250 "psubd %%mm7, %%mm3 \n\t" | |
251 "packssdw %%mm1, %%mm0 \n\t" | |
252 "packssdw %%mm3, %%mm2 \n\t" | |
253 "movq %%mm0, 24(%0, %%edi) \n\t" | |
254 "movq %%mm2, 32(%0, %%edi) \n\t" | |
255 | |
256 "addl $16, %%esi \n\t" | |
257 " jnz 1b \n\t" | |
258 "emms \n\t" | |
259 :: "r" (s16+1280), "r" (f+256) | |
260 :"%esi", "%edi", "memory" | |
261 ); | |
3909 | 262 return 5*256; |
263 } | |
264 | |
265 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
266 int32_t * f = (int32_t *) _f; | |
3569 | 267 asm volatile( |
268 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
269 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 270 "pxor %%mm6, %%mm6 \n\t" |
271 "1: \n\t" | |
272 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
273 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
274 "movq (%1, %%esi), %%mm2 \n\t" | |
275 "movq 8(%1, %%esi), %%mm3 \n\t" | |
276 "psubd %%mm7, %%mm0 \n\t" | |
277 "psubd %%mm7, %%mm1 \n\t" | |
278 "psubd %%mm7, %%mm2 \n\t" | |
279 "psubd %%mm7, %%mm3 \n\t" | |
280 "packssdw %%mm1, %%mm0 \n\t" | |
281 "packssdw %%mm3, %%mm2 \n\t" | |
282 "movq %%mm0, %%mm1 \n\t" | |
283 "punpcklwd %%mm2, %%mm0 \n\t" | |
284 "punpckhwd %%mm2, %%mm1 \n\t" | |
285 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
286 "movq %%mm6, (%0, %%edi) \n\t" | |
287 "movd %%mm0, 8(%0, %%edi) \n\t" | |
288 "punpckhdq %%mm0, %%mm0 \n\t" | |
289 "movq %%mm6, 12(%0, %%edi) \n\t" | |
290 "movd %%mm0, 20(%0, %%edi) \n\t" | |
291 "movq %%mm6, 24(%0, %%edi) \n\t" | |
292 "movd %%mm1, 32(%0, %%edi) \n\t" | |
293 "punpckhdq %%mm1, %%mm1 \n\t" | |
294 "movq %%mm6, 36(%0, %%edi) \n\t" | |
295 "movd %%mm1, 44(%0, %%edi) \n\t" | |
296 "addl $16, %%esi \n\t" | |
297 " jnz 1b \n\t" | |
298 "emms \n\t" | |
299 :: "r" (s16+1536), "r" (f+256) | |
300 :"%esi", "%edi", "memory" | |
301 ); | |
3909 | 302 return 6*256; |
303 } | |
304 | |
305 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
306 int32_t * f = (int32_t *) _f; | |
3576 | 307 asm volatile( |
308 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
309 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3576 | 310 "pxor %%mm6, %%mm6 \n\t" |
311 "1: \n\t" | |
312 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
313 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
314 "movq (%1, %%esi), %%mm5 \n\t" | |
315 "psubd %%mm7, %%mm0 \n\t" | |
316 "psubd %%mm7, %%mm1 \n\t" | |
317 "psubd %%mm7, %%mm5 \n\t" | |
318 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
319 | |
320 "pxor %%mm4, %%mm4 \n\t" | |
321 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
322 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
323 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
324 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
325 "movq %%mm0, %%mm1 \n\t" // BAba | |
326 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
327 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
328 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
329 | |
330 "movq %%mm0, (%0, %%edi) \n\t" // 00ba | |
331 "punpckhdq %%mm4, %%mm0 \n\t" // F000 | |
332 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 | |
333 "movq %%mm0, 16(%0, %%edi) \n\t" // F000 | |
334 "addl $8, %%esi \n\t" | |
335 " jnz 1b \n\t" | |
336 "emms \n\t" | |
337 :: "r" (s16+1536), "r" (f+256) | |
338 :"%esi", "%edi", "memory" | |
339 ); | |
3909 | 340 return 6*256; |
341 } | |
342 | |
343 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
344 int32_t * f = (int32_t *) _f; | |
3578 | 345 asm volatile( |
346 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
347 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3578 | 348 "pxor %%mm6, %%mm6 \n\t" |
349 "1: \n\t" | |
350 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
351 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
352 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
353 "movq (%1, %%esi), %%mm5 \n\t" | |
354 "psubd %%mm7, %%mm0 \n\t" | |
355 "psubd %%mm7, %%mm1 \n\t" | |
356 "psubd %%mm7, %%mm4 \n\t" | |
357 "psubd %%mm7, %%mm5 \n\t" | |
358 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
359 | |
360 "packssdw %%mm4, %%mm0 \n\t" // EeAa | |
361 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
362 "movq %%mm0, %%mm2 \n\t" // EeAa | |
363 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
364 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
365 "movq %%mm0, %%mm1 \n\t" // BAba | |
366 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
367 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
368 | |
369 "movq %%mm0, (%0, %%edi) \n\t" | |
370 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 | |
371 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
372 "movq %%mm2, 8(%0, %%edi) \n\t" | |
373 "movq %%mm0, 16(%0, %%edi) \n\t" | |
374 "addl $8, %%esi \n\t" | |
375 " jnz 1b \n\t" | |
376 "emms \n\t" | |
377 :: "r" (s16+1536), "r" (f+256) | |
378 :"%esi", "%edi", "memory" | |
379 ); | |
3909 | 380 return 6*256; |
381 } | |
382 | |
383 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
384 int32_t * f = (int32_t *) _f; | |
3577 | 385 asm volatile( |
386 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
387 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3577 | 388 // "pxor %%mm6, %%mm6 \n\t" |
389 "1: \n\t" | |
390 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
391 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
392 "movq 3072(%1, %%esi), %%mm2 \n\t" | |
393 "movq 4096(%1, %%esi), %%mm3 \n\t" | |
394 "movq (%1, %%esi), %%mm5 \n\t" | |
395 "psubd %%mm7, %%mm0 \n\t" | |
396 "psubd %%mm7, %%mm1 \n\t" | |
397 "psubd %%mm7, %%mm2 \n\t" | |
398 "psubd %%mm7, %%mm3 \n\t" | |
399 "psubd %%mm7, %%mm5 \n\t" | |
400 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
401 | |
402 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
403 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
404 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
405 "movq %%mm0, %%mm2 \n\t" // CcAa | |
406 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
407 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
408 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
409 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
410 "movq %%mm0, %%mm1 \n\t" // BAba | |
411 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
412 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
413 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
414 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
415 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
416 | |
417 "movq %%mm0, (%0, %%edi) \n\t" | |
418 "movq %%mm4, 8(%0, %%edi) \n\t" | |
419 "movq %%mm2, 16(%0, %%edi) \n\t" | |
420 "addl $8, %%esi \n\t" | |
421 " jnz 1b \n\t" | |
422 "emms \n\t" | |
423 :: "r" (s16+1536), "r" (f+256) | |
424 :"%esi", "%edi", "memory" | |
425 ); | |
3909 | 426 return 6*256; |
427 } | |
428 | |
429 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
430 int32_t * f = (int32_t *) _f; | |
3575 | 431 asm volatile( |
432 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
433 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3575 | 434 // "pxor %%mm6, %%mm6 \n\t" |
435 "1: \n\t" | |
436 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
437 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
438 "movq 4096(%1, %%esi), %%mm2 \n\t" | |
439 "movq 5120(%1, %%esi), %%mm3 \n\t" | |
440 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
441 "movq (%1, %%esi), %%mm5 \n\t" | |
442 "psubd %%mm7, %%mm0 \n\t" | |
443 "psubd %%mm7, %%mm1 \n\t" | |
444 "psubd %%mm7, %%mm2 \n\t" | |
445 "psubd %%mm7, %%mm3 \n\t" | |
446 "psubd %%mm7, %%mm4 \n\t" | |
447 "psubd %%mm7, %%mm5 \n\t" | |
448 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
449 | |
450 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
451 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
452 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
453 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
454 "movq %%mm0, %%mm2 \n\t" // CcAa | |
455 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
456 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
457 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
458 "movq %%mm0, %%mm1 \n\t" // BAba | |
459 "movq %%mm4, %%mm3 \n\t" // FEfe | |
460 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
461 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
462 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
463 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
464 | |
465 "movq %%mm0, (%0, %%edi) \n\t" | |
466 "movq %%mm4, 8(%0, %%edi) \n\t" | |
467 "movq %%mm2, 16(%0, %%edi) \n\t" | |
468 "addl $8, %%esi \n\t" | |
469 " jnz 1b \n\t" | |
470 "emms \n\t" | |
471 :: "r" (s16+1536), "r" (f+256) | |
472 :"%esi", "%edi", "memory" | |
473 ); | |
3909 | 474 return 6*256; |
475 } | |
476 | |
477 | |
478 static void* a52_resample_MMX(int flags, int ch){ | |
479 switch (flags) { | |
480 case A52_MONO: | |
481 if(ch==5) return a52_resample_MONO_to_5_MMX; | |
482 break; | |
483 case A52_CHANNEL: | |
484 case A52_STEREO: | |
485 case A52_DOLBY: | |
486 if(ch==2) return a52_resample_STEREO_to_2_MMX; | |
487 break; | |
488 case A52_3F: | |
489 if(ch==5) return a52_resample_3F_to_5_MMX; | |
490 break; | |
491 case A52_2F2R: | |
492 if(ch==4) return a52_resample_2F_2R_to_4_MMX; | |
493 break; | |
494 case A52_3F2R: | |
495 if(ch==5) return a52_resample_3F_2R_to_5_MMX; | |
496 break; | |
497 case A52_MONO | A52_LFE: | |
498 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; | |
499 break; | |
500 case A52_CHANNEL | A52_LFE: | |
501 case A52_STEREO | A52_LFE: | |
502 case A52_DOLBY | A52_LFE: | |
503 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; | |
504 break; | |
505 case A52_3F | A52_LFE: | |
506 if(ch==6) return a52_resample_3F_LFE_to_6_MMX; | |
507 break; | |
508 case A52_2F2R | A52_LFE: | |
509 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; | |
510 break; | |
511 case A52_3F2R | A52_LFE: | |
512 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
513 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
514 } |
3909 | 515 return NULL; |
3626 | 516 } |
517 | |
3909 | 518 |