Mercurial > mplayer.hg
annotate liba52/resample_mmx.c @ 25205:f291ec948fca
Get rid of annoying, space-wasting sizeof(uint32_t)
author | reimar |
---|---|
date | Sat, 01 Dec 2007 15:01:55 +0000 |
parents | d6219ce521e9 |
children | a180e69cc18c |
rev | line source |
---|---|
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
1 |
3569 | 2 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) |
3 | |
4 /* optimization TODO / NOTES | |
5 movntq is slightly faster (0.5% with the current test.c benchmark) | |
6 (but thats just test.c so that needs to be testd in reallity) | |
7 and it would mean (C / MMX2 / MMX / 3DNOW) versions | |
8 */ | |
9 | |
16173 | 10 #include "a52_internal.h" |
11 | |
12 | |
12303
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
13 static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
14 static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
15 static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
16 static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
17 |
3909 | 18 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
19 int32_t * f = (int32_t *) _f; |
3574 | 20 asm volatile( |
16173 | 21 "mov $-512, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
22 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
23 "movq "MANGLE(wm1100)", %%mm3 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
24 "movq "MANGLE(wm0101)", %%mm4 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
25 "movq "MANGLE(wm1010)", %%mm5 \n\t" |
3574 | 26 "pxor %%mm6, %%mm6 \n\t" |
27 "1: \n\t" | |
16173 | 28 "movq (%1, %%"REG_S", 2), %%mm0 \n\t" |
29 "movq 8(%1, %%"REG_S", 2), %%mm1\n\t" | |
30 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
3574 | 31 "psubd %%mm7, %%mm0 \n\t" |
32 "psubd %%mm7, %%mm1 \n\t" | |
33 "packssdw %%mm1, %%mm0 \n\t" | |
34 "movq %%mm0, %%mm1 \n\t" | |
35 "pand %%mm4, %%mm0 \n\t" | |
36 "pand %%mm5, %%mm1 \n\t" | |
16173 | 37 "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0 |
38 "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0 | |
3574 | 39 "pand %%mm3, %%mm0 \n\t" |
16173 | 40 "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0 |
41 "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B | |
3574 | 42 "pand %%mm3, %%mm1 \n\t" |
16173 | 43 "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0 |
44 "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0 | |
45 "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B | |
46 "add $8, %%"REG_S" \n\t" | |
3574 | 47 " jnz 1b \n\t" |
48 "emms \n\t" | |
49 :: "r" (s16+1280), "r" (f+256) | |
16173 | 50 :"%"REG_S, "%"REG_D, "memory" |
3574 | 51 ); |
3909 | 52 return 5*256; |
53 } | |
54 | |
55 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ | |
56 int32_t * f = (int32_t *) _f; | |
3567 | 57 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
58 #ifdef HAVE_SSE | |
59 asm volatile( | |
16173 | 60 "mov $-1024, %%"REG_S" \n\t" |
3567 | 61 "1: \n\t" |
16173 | 62 "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t" |
63 "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t" | |
3567 | 64 "movq %%mm0, %%mm1 \n\t" |
65 "punpcklwd %%mm2, %%mm0 \n\t" | |
66 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 67 "movq %%mm0, (%0, %%"REG_S") \n\t" |
68 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
69 "add $16, %%"REG_S" \n\t" | |
3567 | 70 " jnz 1b \n\t" |
71 "emms \n\t" | |
72 :: "r" (s16+512), "r" (f+256) | |
16173 | 73 :"%"REG_S, "memory" |
3567 | 74 );*/ |
75 asm volatile( | |
16173 | 76 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
77 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3567 | 78 "1: \n\t" |
16173 | 79 "movq (%1, %%"REG_S"), %%mm0 \n\t" |
80 "movq 8(%1, %%"REG_S"), %%mm1 \n\t" | |
81 "movq 1024(%1, %%"REG_S"), %%mm2\n\t" | |
82 "movq 1032(%1, %%"REG_S"), %%mm3\n\t" | |
3567 | 83 "psubd %%mm7, %%mm0 \n\t" |
84 "psubd %%mm7, %%mm1 \n\t" | |
85 "psubd %%mm7, %%mm2 \n\t" | |
86 "psubd %%mm7, %%mm3 \n\t" | |
87 "packssdw %%mm1, %%mm0 \n\t" | |
88 "packssdw %%mm3, %%mm2 \n\t" | |
89 "movq %%mm0, %%mm1 \n\t" | |
90 "punpcklwd %%mm2, %%mm0 \n\t" | |
91 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 92 "movq %%mm0, (%0, %%"REG_S") \n\t" |
93 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
94 "add $16, %%"REG_S" \n\t" | |
3567 | 95 " jnz 1b \n\t" |
96 "emms \n\t" | |
97 :: "r" (s16+512), "r" (f+256) | |
16173 | 98 :"%"REG_S, "memory" |
3567 | 99 ); |
3909 | 100 return 2*256; |
101 } | |
102 | |
103 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ | |
104 int32_t * f = (int32_t *) _f; | |
3654 | 105 asm volatile( |
16173 | 106 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
107 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3654 | 108 "pxor %%mm6, %%mm6 \n\t" |
109 "movq %%mm7, %%mm5 \n\t" | |
110 "punpckldq %%mm6, %%mm5 \n\t" | |
111 "1: \n\t" | |
16173 | 112 "movd (%1, %%"REG_S"), %%mm0 \n\t" |
113 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" | |
114 "movd 1024(%1, %%"REG_S"), %%mm1\n\t" | |
115 "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t" | |
116 "movd 2052(%1, %%"REG_S"), %%mm2\n\t" | |
3654 | 117 "movq %%mm7, %%mm3 \n\t" |
16173 | 118 "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t" |
119 "movd 8(%1, %%"REG_S"), %%mm4 \n\t" | |
120 "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t" | |
121 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
122 "sar $1, %%"REG_D" \n\t" | |
3654 | 123 "psubd %%mm7, %%mm0 \n\t" |
124 "psubd %%mm7, %%mm1 \n\t" | |
125 "psubd %%mm5, %%mm2 \n\t" | |
126 "psubd %%mm7, %%mm3 \n\t" | |
127 "psubd %%mm7, %%mm4 \n\t" | |
128 "packssdw %%mm6, %%mm0 \n\t" | |
129 "packssdw %%mm2, %%mm1 \n\t" | |
130 "packssdw %%mm4, %%mm3 \n\t" | |
16173 | 131 "movq %%mm0, (%0, %%"REG_D") \n\t" |
132 "movq %%mm1, 8(%0, %%"REG_D") \n\t" | |
133 "movq %%mm3, 16(%0, %%"REG_D") \n\t" | |
134 "movd 1032(%1, %%"REG_S"), %%mm1\n\t" | |
135 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" | |
136 "movd 2060(%1, %%"REG_S"), %%mm2\n\t" | |
3654 | 137 "movq %%mm7, %%mm3 \n\t" |
16173 | 138 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" |
3654 | 139 "pxor %%mm0, %%mm0 \n\t" |
140 "psubd %%mm7, %%mm1 \n\t" | |
141 "psubd %%mm5, %%mm2 \n\t" | |
142 "psubd %%mm7, %%mm3 \n\t" | |
143 "packssdw %%mm1, %%mm0 \n\t" | |
144 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 145 "movq %%mm0, 24(%0, %%"REG_D") \n\t" |
146 "movq %%mm2, 32(%0, %%"REG_D") \n\t" | |
3654 | 147 |
16173 | 148 "add $16, %%"REG_S" \n\t" |
3654 | 149 " jnz 1b \n\t" |
150 "emms \n\t" | |
151 :: "r" (s16+1280), "r" (f+256) | |
16173 | 152 :"%"REG_S, "%"REG_D, "memory" |
3654 | 153 ); |
3909 | 154 return 5*256; |
155 } | |
156 | |
157 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ | |
158 int32_t * f = (int32_t *) _f; | |
3569 | 159 asm volatile( |
16173 | 160 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
161 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 162 "1: \n\t" |
16173 | 163 "movq (%1, %%"REG_S"), %%mm0 \n\t" |
164 "movq 8(%1, %%"REG_S"), %%mm1 \n\t" | |
165 "movq 1024(%1, %%"REG_S"), %%mm2\n\t" | |
166 "movq 1032(%1, %%"REG_S"), %%mm3\n\t" | |
3569 | 167 "psubd %%mm7, %%mm0 \n\t" |
168 "psubd %%mm7, %%mm1 \n\t" | |
169 "psubd %%mm7, %%mm2 \n\t" | |
170 "psubd %%mm7, %%mm3 \n\t" | |
171 "packssdw %%mm1, %%mm0 \n\t" | |
172 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 173 "movq 2048(%1, %%"REG_S"), %%mm3\n\t" |
174 "movq 2056(%1, %%"REG_S"), %%mm4\n\t" | |
175 "movq 3072(%1, %%"REG_S"), %%mm5\n\t" | |
176 "movq 3080(%1, %%"REG_S"), %%mm6\n\t" | |
3569 | 177 "psubd %%mm7, %%mm3 \n\t" |
178 "psubd %%mm7, %%mm4 \n\t" | |
179 "psubd %%mm7, %%mm5 \n\t" | |
180 "psubd %%mm7, %%mm6 \n\t" | |
181 "packssdw %%mm4, %%mm3 \n\t" | |
182 "packssdw %%mm6, %%mm5 \n\t" | |
183 "movq %%mm0, %%mm1 \n\t" | |
184 "movq %%mm3, %%mm4 \n\t" | |
185 "punpcklwd %%mm2, %%mm0 \n\t" | |
186 "punpckhwd %%mm2, %%mm1 \n\t" | |
187 "punpcklwd %%mm5, %%mm3 \n\t" | |
188 "punpckhwd %%mm5, %%mm4 \n\t" | |
189 "movq %%mm0, %%mm2 \n\t" | |
190 "movq %%mm1, %%mm5 \n\t" | |
191 "punpckldq %%mm3, %%mm0 \n\t" | |
192 "punpckhdq %%mm3, %%mm2 \n\t" | |
193 "punpckldq %%mm4, %%mm1 \n\t" | |
194 "punpckhdq %%mm4, %%mm5 \n\t" | |
16173 | 195 "movq %%mm0, (%0, %%"REG_S",2) \n\t" |
196 "movq %%mm2, 8(%0, %%"REG_S",2) \n\t" | |
197 "movq %%mm1, 16(%0, %%"REG_S",2)\n\t" | |
198 "movq %%mm5, 24(%0, %%"REG_S",2)\n\t" | |
199 "add $16, %%"REG_S" \n\t" | |
3569 | 200 " jnz 1b \n\t" |
201 "emms \n\t" | |
202 :: "r" (s16+1024), "r" (f+256) | |
16173 | 203 :"%"REG_S, "memory" |
3569 | 204 ); |
3909 | 205 return 4*256; |
206 } | |
207 | |
208 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ | |
209 int32_t * f = (int32_t *) _f; | |
3653 | 210 asm volatile( |
16173 | 211 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
212 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3653 | 213 "1: \n\t" |
16173 | 214 "movd (%1, %%"REG_S"), %%mm0 \n\t" |
215 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" | |
216 "movd 3072(%1, %%"REG_S"), %%mm1\n\t" | |
217 "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t" | |
218 "movd 1024(%1, %%"REG_S"), %%mm2\n\t" | |
219 "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t" | |
220 "movd 2052(%1, %%"REG_S"), %%mm3\n\t" | |
221 "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t" | |
222 "movd 4100(%1, %%"REG_S"), %%mm4\n\t" | |
223 "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t" | |
224 "movd 8(%1, %%"REG_S"), %%mm5 \n\t" | |
225 "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t" | |
226 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
227 "sar $1, %%"REG_D" \n\t" | |
3653 | 228 "psubd %%mm7, %%mm0 \n\t" |
229 "psubd %%mm7, %%mm1 \n\t" | |
230 "psubd %%mm7, %%mm2 \n\t" | |
231 "psubd %%mm7, %%mm3 \n\t" | |
232 "psubd %%mm7, %%mm4 \n\t" | |
233 "psubd %%mm7, %%mm5 \n\t" | |
234 "packssdw %%mm1, %%mm0 \n\t" | |
235 "packssdw %%mm3, %%mm2 \n\t" | |
236 "packssdw %%mm5, %%mm4 \n\t" | |
16173 | 237 "movq %%mm0, (%0, %%"REG_D") \n\t" |
238 "movq %%mm2, 8(%0, %%"REG_D") \n\t" | |
239 "movq %%mm4, 16(%0, %%"REG_D") \n\t" | |
3653 | 240 |
16173 | 241 "movd 3080(%1, %%"REG_S"), %%mm0\n\t" |
242 "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t" | |
243 "movd 1032(%1, %%"REG_S"), %%mm1\n\t" | |
244 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" | |
245 "movd 2060(%1, %%"REG_S"), %%mm2\n\t" | |
246 "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t" | |
247 "movd 4108(%1, %%"REG_S"), %%mm3\n\t" | |
248 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" | |
3653 | 249 "psubd %%mm7, %%mm0 \n\t" |
250 "psubd %%mm7, %%mm1 \n\t" | |
251 "psubd %%mm7, %%mm2 \n\t" | |
252 "psubd %%mm7, %%mm3 \n\t" | |
253 "packssdw %%mm1, %%mm0 \n\t" | |
254 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 255 "movq %%mm0, 24(%0, %%"REG_D") \n\t" |
256 "movq %%mm2, 32(%0, %%"REG_D") \n\t" | |
3653 | 257 |
16173 | 258 "add $16, %%"REG_S" \n\t" |
3653 | 259 " jnz 1b \n\t" |
260 "emms \n\t" | |
261 :: "r" (s16+1280), "r" (f+256) | |
16173 | 262 :"%"REG_S, "%"REG_D, "memory" |
3653 | 263 ); |
3909 | 264 return 5*256; |
265 } | |
266 | |
267 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
268 int32_t * f = (int32_t *) _f; | |
3569 | 269 asm volatile( |
16173 | 270 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
271 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 272 "pxor %%mm6, %%mm6 \n\t" |
273 "1: \n\t" | |
16173 | 274 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
275 "movq 1032(%1, %%"REG_S"), %%mm1\n\t" | |
276 "movq (%1, %%"REG_S"), %%mm2 \n\t" | |
277 "movq 8(%1, %%"REG_S"), %%mm3 \n\t" | |
3569 | 278 "psubd %%mm7, %%mm0 \n\t" |
279 "psubd %%mm7, %%mm1 \n\t" | |
280 "psubd %%mm7, %%mm2 \n\t" | |
281 "psubd %%mm7, %%mm3 \n\t" | |
282 "packssdw %%mm1, %%mm0 \n\t" | |
283 "packssdw %%mm3, %%mm2 \n\t" | |
284 "movq %%mm0, %%mm1 \n\t" | |
285 "punpcklwd %%mm2, %%mm0 \n\t" | |
286 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 287 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
288 "movq %%mm6, (%0, %%"REG_D") \n\t" | |
289 "movd %%mm0, 8(%0, %%"REG_D") \n\t" | |
3569 | 290 "punpckhdq %%mm0, %%mm0 \n\t" |
16173 | 291 "movq %%mm6, 12(%0, %%"REG_D") \n\t" |
292 "movd %%mm0, 20(%0, %%"REG_D") \n\t" | |
293 "movq %%mm6, 24(%0, %%"REG_D") \n\t" | |
294 "movd %%mm1, 32(%0, %%"REG_D") \n\t" | |
3569 | 295 "punpckhdq %%mm1, %%mm1 \n\t" |
16173 | 296 "movq %%mm6, 36(%0, %%"REG_D") \n\t" |
297 "movd %%mm1, 44(%0, %%"REG_D") \n\t" | |
298 "add $16, %%"REG_S" \n\t" | |
3569 | 299 " jnz 1b \n\t" |
300 "emms \n\t" | |
301 :: "r" (s16+1536), "r" (f+256) | |
16173 | 302 :"%"REG_S, "%"REG_D, "memory" |
3569 | 303 ); |
3909 | 304 return 6*256; |
305 } | |
306 | |
307 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
308 int32_t * f = (int32_t *) _f; | |
3576 | 309 asm volatile( |
16173 | 310 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
311 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3576 | 312 "pxor %%mm6, %%mm6 \n\t" |
313 "1: \n\t" | |
16173 | 314 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
315 "movq 2048(%1, %%"REG_S"), %%mm1\n\t" | |
316 "movq (%1, %%"REG_S"), %%mm5 \n\t" | |
3576 | 317 "psubd %%mm7, %%mm0 \n\t" |
318 "psubd %%mm7, %%mm1 \n\t" | |
319 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 320 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
3576 | 321 |
322 "pxor %%mm4, %%mm4 \n\t" | |
323 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
324 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
325 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
326 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
327 "movq %%mm0, %%mm1 \n\t" // BAba | |
328 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
329 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
330 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
331 | |
16173 | 332 "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba |
3576 | 333 "punpckhdq %%mm4, %%mm0 \n\t" // F000 |
16173 | 334 "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0 |
335 "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000 | |
336 "add $8, %%"REG_S" \n\t" | |
3576 | 337 " jnz 1b \n\t" |
338 "emms \n\t" | |
339 :: "r" (s16+1536), "r" (f+256) | |
16173 | 340 :"%"REG_S, "%"REG_D, "memory" |
3576 | 341 ); |
3909 | 342 return 6*256; |
343 } | |
344 | |
345 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
346 int32_t * f = (int32_t *) _f; | |
3578 | 347 asm volatile( |
16173 | 348 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
349 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3578 | 350 "pxor %%mm6, %%mm6 \n\t" |
351 "1: \n\t" | |
16173 | 352 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
353 "movq 3072(%1, %%"REG_S"), %%mm1\n\t" | |
354 "movq 2048(%1, %%"REG_S"), %%mm4\n\t" | |
355 "movq (%1, %%"REG_S"), %%mm5 \n\t" | |
3578 | 356 "psubd %%mm7, %%mm0 \n\t" |
357 "psubd %%mm7, %%mm1 \n\t" | |
358 "psubd %%mm7, %%mm4 \n\t" | |
359 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 360 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
3578 | 361 |
362 "packssdw %%mm4, %%mm0 \n\t" // EeAa | |
363 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
364 "movq %%mm0, %%mm2 \n\t" // EeAa | |
365 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
366 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
367 "movq %%mm0, %%mm1 \n\t" // BAba | |
368 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
369 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
370 | |
16173 | 371 "movq %%mm0, (%0, %%"REG_D") \n\t" |
3578 | 372 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 |
373 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
16173 | 374 "movq %%mm2, 8(%0, %%"REG_D") \n\t" |
375 "movq %%mm0, 16(%0, %%"REG_D") \n\t" | |
376 "add $8, %%"REG_S" \n\t" | |
3578 | 377 " jnz 1b \n\t" |
378 "emms \n\t" | |
379 :: "r" (s16+1536), "r" (f+256) | |
16173 | 380 :"%"REG_S, "%"REG_D, "memory" |
3578 | 381 ); |
3909 | 382 return 6*256; |
383 } | |
384 | |
385 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
386 int32_t * f = (int32_t *) _f; | |
3577 | 387 asm volatile( |
16173 | 388 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
389 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3577 | 390 // "pxor %%mm6, %%mm6 \n\t" |
391 "1: \n\t" | |
16173 | 392 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
393 "movq 2048(%1, %%"REG_S"), %%mm1\n\t" | |
394 "movq 3072(%1, %%"REG_S"), %%mm2\n\t" | |
395 "movq 4096(%1, %%"REG_S"), %%mm3\n\t" | |
396 "movq (%1, %%"REG_S"), %%mm5 \n\t" | |
3577 | 397 "psubd %%mm7, %%mm0 \n\t" |
398 "psubd %%mm7, %%mm1 \n\t" | |
399 "psubd %%mm7, %%mm2 \n\t" | |
400 "psubd %%mm7, %%mm3 \n\t" | |
401 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 402 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
3577 | 403 |
404 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
405 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
406 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
407 "movq %%mm0, %%mm2 \n\t" // CcAa | |
408 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
409 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
410 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
411 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
412 "movq %%mm0, %%mm1 \n\t" // BAba | |
413 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
414 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
415 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
416 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
417 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
418 | |
16173 | 419 "movq %%mm0, (%0, %%"REG_D") \n\t" |
420 "movq %%mm4, 8(%0, %%"REG_D") \n\t" | |
421 "movq %%mm2, 16(%0, %%"REG_D") \n\t" | |
422 "add $8, %%"REG_S" \n\t" | |
3577 | 423 " jnz 1b \n\t" |
424 "emms \n\t" | |
425 :: "r" (s16+1536), "r" (f+256) | |
16173 | 426 :"%"REG_S, "%"REG_D, "memory" |
3577 | 427 ); |
3909 | 428 return 6*256; |
429 } | |
430 | |
431 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
432 int32_t * f = (int32_t *) _f; | |
3575 | 433 asm volatile( |
16173 | 434 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
435 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3575 | 436 // "pxor %%mm6, %%mm6 \n\t" |
437 "1: \n\t" | |
16173 | 438 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
439 "movq 3072(%1, %%"REG_S"), %%mm1\n\t" | |
440 "movq 4096(%1, %%"REG_S"), %%mm2\n\t" | |
441 "movq 5120(%1, %%"REG_S"), %%mm3\n\t" | |
442 "movq 2048(%1, %%"REG_S"), %%mm4\n\t" | |
443 "movq (%1, %%"REG_S"), %%mm5 \n\t" | |
3575 | 444 "psubd %%mm7, %%mm0 \n\t" |
445 "psubd %%mm7, %%mm1 \n\t" | |
446 "psubd %%mm7, %%mm2 \n\t" | |
447 "psubd %%mm7, %%mm3 \n\t" | |
448 "psubd %%mm7, %%mm4 \n\t" | |
449 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 450 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
3575 | 451 |
452 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
453 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
454 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
455 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
456 "movq %%mm0, %%mm2 \n\t" // CcAa | |
457 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
458 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
459 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
460 "movq %%mm0, %%mm1 \n\t" // BAba | |
461 "movq %%mm4, %%mm3 \n\t" // FEfe | |
462 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
463 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
464 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
465 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
466 | |
16173 | 467 "movq %%mm0, (%0, %%"REG_D") \n\t" |
468 "movq %%mm4, 8(%0, %%"REG_D") \n\t" | |
469 "movq %%mm2, 16(%0, %%"REG_D") \n\t" | |
470 "add $8, %%"REG_S" \n\t" | |
3575 | 471 " jnz 1b \n\t" |
472 "emms \n\t" | |
473 :: "r" (s16+1536), "r" (f+256) | |
16173 | 474 :"%"REG_S, "%"REG_D, "memory" |
3575 | 475 ); |
3909 | 476 return 6*256; |
477 } | |
478 | |
479 | |
480 static void* a52_resample_MMX(int flags, int ch){ | |
481 switch (flags) { | |
482 case A52_MONO: | |
483 if(ch==5) return a52_resample_MONO_to_5_MMX; | |
484 break; | |
485 case A52_CHANNEL: | |
486 case A52_STEREO: | |
487 case A52_DOLBY: | |
488 if(ch==2) return a52_resample_STEREO_to_2_MMX; | |
489 break; | |
490 case A52_3F: | |
491 if(ch==5) return a52_resample_3F_to_5_MMX; | |
492 break; | |
493 case A52_2F2R: | |
494 if(ch==4) return a52_resample_2F_2R_to_4_MMX; | |
495 break; | |
496 case A52_3F2R: | |
497 if(ch==5) return a52_resample_3F_2R_to_5_MMX; | |
498 break; | |
499 case A52_MONO | A52_LFE: | |
500 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; | |
501 break; | |
502 case A52_CHANNEL | A52_LFE: | |
503 case A52_STEREO | A52_LFE: | |
504 case A52_DOLBY | A52_LFE: | |
505 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; | |
506 break; | |
507 case A52_3F | A52_LFE: | |
508 if(ch==6) return a52_resample_3F_LFE_to_6_MMX; | |
509 break; | |
510 case A52_2F2R | A52_LFE: | |
511 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; | |
512 break; | |
513 case A52_3F2R | A52_LFE: | |
514 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
515 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
516 } |
3909 | 517 return NULL; |
3626 | 518 } |
519 | |
3909 | 520 |