Mercurial > mplayer.hg
annotate liba52/resample_mmx.c @ 25317:7f3cb5408f28
Fixed VIDIX color bug that was introduced when Radeon VIDIX driver
was synchronized with vidix.sf.net.
The red color was saturating.
Corrected value fixes the issue and restore the color to the level
it used to have before synchronization.
Meaning of the value remains unknow but was retrieved from
register's value of a Radeon 9000 card, so it may need further testing.
Patch by Guillaume Lecerf (foxcore at gmail dot com)
author | ben |
---|---|
date | Mon, 10 Dec 2007 19:27:46 +0000 |
parents | d6219ce521e9 |
children | a180e69cc18c |
rev | line source |
---|---|
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
1 |
3569 | 2 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) |
3 | |
4 /* optimization TODO / NOTES | |
5 movntq is slightly faster (0.5% with the current test.c benchmark) | |
6 (but thats just test.c so that needs to be testd in reallity) | |
7 and it would mean (C / MMX2 / MMX / 3DNOW) versions | |
8 */ | |
9 | |
16173 | 10 #include "a52_internal.h" |
11 | |
12 | |
12303
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
13 static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
14 static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
15 static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
16 static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
17 |
3909 | 18 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
19 int32_t * f = (int32_t *) _f; |
3574 | 20 asm volatile( |
16173 | 21 "mov $-512, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
22 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
23 "movq "MANGLE(wm1100)", %%mm3 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
24 "movq "MANGLE(wm0101)", %%mm4 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
25 "movq "MANGLE(wm1010)", %%mm5 \n\t" |
3574 | 26 "pxor %%mm6, %%mm6 \n\t" |
27 "1: \n\t" | |
16173 | 28 "movq (%1, %%"REG_S", 2), %%mm0 \n\t" |
29 "movq 8(%1, %%"REG_S", 2), %%mm1\n\t" | |
30 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
3574 | 31 "psubd %%mm7, %%mm0 \n\t" |
32 "psubd %%mm7, %%mm1 \n\t" | |
33 "packssdw %%mm1, %%mm0 \n\t" | |
34 "movq %%mm0, %%mm1 \n\t" | |
35 "pand %%mm4, %%mm0 \n\t" | |
36 "pand %%mm5, %%mm1 \n\t" | |
16173 | 37 "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0 |
38 "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0 | |
3574 | 39 "pand %%mm3, %%mm0 \n\t" |
16173 | 40 "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0 |
41 "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B | |
3574 | 42 "pand %%mm3, %%mm1 \n\t" |
16173 | 43 "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0 |
44 "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0 | |
45 "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B | |
46 "add $8, %%"REG_S" \n\t" | |
3574 | 47 " jnz 1b \n\t" |
48 "emms \n\t" | |
49 :: "r" (s16+1280), "r" (f+256) | |
16173 | 50 :"%"REG_S, "%"REG_D, "memory" |
3574 | 51 ); |
3909 | 52 return 5*256; |
53 } | |
54 | |
55 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ | |
56 int32_t * f = (int32_t *) _f; | |
3567 | 57 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
58 #ifdef HAVE_SSE | |
59 asm volatile( | |
16173 | 60 "mov $-1024, %%"REG_S" \n\t" |
3567 | 61 "1: \n\t" |
16173 | 62 "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t" |
63 "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t" | |
3567 | 64 "movq %%mm0, %%mm1 \n\t" |
65 "punpcklwd %%mm2, %%mm0 \n\t" | |
66 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 67 "movq %%mm0, (%0, %%"REG_S") \n\t" |
68 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
69 "add $16, %%"REG_S" \n\t" | |
3567 | 70 " jnz 1b \n\t" |
71 "emms \n\t" | |
72 :: "r" (s16+512), "r" (f+256) | |
16173 | 73 :"%"REG_S, "memory" |
3567 | 74 );*/ |
75 asm volatile( | |
16173 | 76 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
77 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3567 | 78 "1: \n\t" |
16173 | 79 "movq (%1, %%"REG_S"), %%mm0 \n\t" |
80 "movq 8(%1, %%"REG_S"), %%mm1 \n\t" | |
81 "movq 1024(%1, %%"REG_S"), %%mm2\n\t" | |
82 "movq 1032(%1, %%"REG_S"), %%mm3\n\t" | |
3567 | 83 "psubd %%mm7, %%mm0 \n\t" |
84 "psubd %%mm7, %%mm1 \n\t" | |
85 "psubd %%mm7, %%mm2 \n\t" | |
86 "psubd %%mm7, %%mm3 \n\t" | |
87 "packssdw %%mm1, %%mm0 \n\t" | |
88 "packssdw %%mm3, %%mm2 \n\t" | |
89 "movq %%mm0, %%mm1 \n\t" | |
90 "punpcklwd %%mm2, %%mm0 \n\t" | |
91 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 92 "movq %%mm0, (%0, %%"REG_S") \n\t" |
93 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
94 "add $16, %%"REG_S" \n\t" | |
3567 | 95 " jnz 1b \n\t" |
96 "emms \n\t" | |
97 :: "r" (s16+512), "r" (f+256) | |
16173 | 98 :"%"REG_S, "memory" |
3567 | 99 ); |
3909 | 100 return 2*256; |
101 } | |
102 | |
103 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ | |
104 int32_t * f = (int32_t *) _f; | |
3654 | 105 asm volatile( |
16173 | 106 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
107 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3654 | 108 "pxor %%mm6, %%mm6 \n\t" |
109 "movq %%mm7, %%mm5 \n\t" | |
110 "punpckldq %%mm6, %%mm5 \n\t" | |
111 "1: \n\t" | |
16173 | 112 "movd (%1, %%"REG_S"), %%mm0 \n\t" |
113 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" | |
114 "movd 1024(%1, %%"REG_S"), %%mm1\n\t" | |
115 "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t" | |
116 "movd 2052(%1, %%"REG_S"), %%mm2\n\t" | |
3654 | 117 "movq %%mm7, %%mm3 \n\t" |
16173 | 118 "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t" |
119 "movd 8(%1, %%"REG_S"), %%mm4 \n\t" | |
120 "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t" | |
121 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
122 "sar $1, %%"REG_D" \n\t" | |
3654 | 123 "psubd %%mm7, %%mm0 \n\t" |
124 "psubd %%mm7, %%mm1 \n\t" | |
125 "psubd %%mm5, %%mm2 \n\t" | |
126 "psubd %%mm7, %%mm3 \n\t" | |
127 "psubd %%mm7, %%mm4 \n\t" | |
128 "packssdw %%mm6, %%mm0 \n\t" | |
129 "packssdw %%mm2, %%mm1 \n\t" | |
130 "packssdw %%mm4, %%mm3 \n\t" | |
16173 | 131 "movq %%mm0, (%0, %%"REG_D") \n\t" |
132 "movq %%mm1, 8(%0, %%"REG_D") \n\t" | |
133 "movq %%mm3, 16(%0, %%"REG_D") \n\t" | |
134 "movd 1032(%1, %%"REG_S"), %%mm1\n\t" | |
135 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" | |
136 "movd 2060(%1, %%"REG_S"), %%mm2\n\t" | |
3654 | 137 "movq %%mm7, %%mm3 \n\t" |
16173 | 138 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" |
3654 | 139 "pxor %%mm0, %%mm0 \n\t" |
140 "psubd %%mm7, %%mm1 \n\t" | |
141 "psubd %%mm5, %%mm2 \n\t" | |
142 "psubd %%mm7, %%mm3 \n\t" | |
143 "packssdw %%mm1, %%mm0 \n\t" | |
144 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 145 "movq %%mm0, 24(%0, %%"REG_D") \n\t" |
146 "movq %%mm2, 32(%0, %%"REG_D") \n\t" | |
3654 | 147 |
16173 | 148 "add $16, %%"REG_S" \n\t" |
3654 | 149 " jnz 1b \n\t" |
150 "emms \n\t" | |
151 :: "r" (s16+1280), "r" (f+256) | |
16173 | 152 :"%"REG_S, "%"REG_D, "memory" |
3654 | 153 ); |
3909 | 154 return 5*256; |
155 } | |
156 | |
157 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ | |
158 int32_t * f = (int32_t *) _f; | |
3569 | 159 asm volatile( |
16173 | 160 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
161 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 162 "1: \n\t" |
16173 | 163 "movq (%1, %%"REG_S"), %%mm0 \n\t" |
164 "movq 8(%1, %%"REG_S"), %%mm1 \n\t" | |
165 "movq 1024(%1, %%"REG_S"), %%mm2\n\t" | |
166 "movq 1032(%1, %%"REG_S"), %%mm3\n\t" | |
3569 | 167 "psubd %%mm7, %%mm0 \n\t" |
168 "psubd %%mm7, %%mm1 \n\t" | |
169 "psubd %%mm7, %%mm2 \n\t" | |
170 "psubd %%mm7, %%mm3 \n\t" | |
171 "packssdw %%mm1, %%mm0 \n\t" | |
172 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 173 "movq 2048(%1, %%"REG_S"), %%mm3\n\t" |
174 "movq 2056(%1, %%"REG_S"), %%mm4\n\t" | |
175 "movq 3072(%1, %%"REG_S"), %%mm5\n\t" | |
176 "movq 3080(%1, %%"REG_S"), %%mm6\n\t" | |
3569 | 177 "psubd %%mm7, %%mm3 \n\t" |
178 "psubd %%mm7, %%mm4 \n\t" | |
179 "psubd %%mm7, %%mm5 \n\t" | |
180 "psubd %%mm7, %%mm6 \n\t" | |
181 "packssdw %%mm4, %%mm3 \n\t" | |
182 "packssdw %%mm6, %%mm5 \n\t" | |
183 "movq %%mm0, %%mm1 \n\t" | |
184 "movq %%mm3, %%mm4 \n\t" | |
185 "punpcklwd %%mm2, %%mm0 \n\t" | |
186 "punpckhwd %%mm2, %%mm1 \n\t" | |
187 "punpcklwd %%mm5, %%mm3 \n\t" | |
188 "punpckhwd %%mm5, %%mm4 \n\t" | |
189 "movq %%mm0, %%mm2 \n\t" | |
190 "movq %%mm1, %%mm5 \n\t" | |
191 "punpckldq %%mm3, %%mm0 \n\t" | |
192 "punpckhdq %%mm3, %%mm2 \n\t" | |
193 "punpckldq %%mm4, %%mm1 \n\t" | |
194 "punpckhdq %%mm4, %%mm5 \n\t" | |
16173 | 195 "movq %%mm0, (%0, %%"REG_S",2) \n\t" |
196 "movq %%mm2, 8(%0, %%"REG_S",2) \n\t" | |
197 "movq %%mm1, 16(%0, %%"REG_S",2)\n\t" | |
198 "movq %%mm5, 24(%0, %%"REG_S",2)\n\t" | |
199 "add $16, %%"REG_S" \n\t" | |
3569 | 200 " jnz 1b \n\t" |
201 "emms \n\t" | |
202 :: "r" (s16+1024), "r" (f+256) | |
16173 | 203 :"%"REG_S, "memory" |
3569 | 204 ); |
3909 | 205 return 4*256; |
206 } | |
207 | |
208 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ | |
209 int32_t * f = (int32_t *) _f; | |
3653 | 210 asm volatile( |
16173 | 211 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
212 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3653 | 213 "1: \n\t" |
16173 | 214 "movd (%1, %%"REG_S"), %%mm0 \n\t" |
215 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" | |
216 "movd 3072(%1, %%"REG_S"), %%mm1\n\t" | |
217 "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t" | |
218 "movd 1024(%1, %%"REG_S"), %%mm2\n\t" | |
219 "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t" | |
220 "movd 2052(%1, %%"REG_S"), %%mm3\n\t" | |
221 "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t" | |
222 "movd 4100(%1, %%"REG_S"), %%mm4\n\t" | |
223 "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t" | |
224 "movd 8(%1, %%"REG_S"), %%mm5 \n\t" | |
225 "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t" | |
226 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
227 "sar $1, %%"REG_D" \n\t" | |
3653 | 228 "psubd %%mm7, %%mm0 \n\t" |
229 "psubd %%mm7, %%mm1 \n\t" | |
230 "psubd %%mm7, %%mm2 \n\t" | |
231 "psubd %%mm7, %%mm3 \n\t" | |
232 "psubd %%mm7, %%mm4 \n\t" | |
233 "psubd %%mm7, %%mm5 \n\t" | |
234 "packssdw %%mm1, %%mm0 \n\t" | |
235 "packssdw %%mm3, %%mm2 \n\t" | |
236 "packssdw %%mm5, %%mm4 \n\t" | |
16173 | 237 "movq %%mm0, (%0, %%"REG_D") \n\t" |
238 "movq %%mm2, 8(%0, %%"REG_D") \n\t" | |
239 "movq %%mm4, 16(%0, %%"REG_D") \n\t" | |
3653 | 240 |
16173 | 241 "movd 3080(%1, %%"REG_S"), %%mm0\n\t" |
242 "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t" | |
243 "movd 1032(%1, %%"REG_S"), %%mm1\n\t" | |
244 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" | |
245 "movd 2060(%1, %%"REG_S"), %%mm2\n\t" | |
246 "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t" | |
247 "movd 4108(%1, %%"REG_S"), %%mm3\n\t" | |
248 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" | |
3653 | 249 "psubd %%mm7, %%mm0 \n\t" |
250 "psubd %%mm7, %%mm1 \n\t" | |
251 "psubd %%mm7, %%mm2 \n\t" | |
252 "psubd %%mm7, %%mm3 \n\t" | |
253 "packssdw %%mm1, %%mm0 \n\t" | |
254 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 255 "movq %%mm0, 24(%0, %%"REG_D") \n\t" |
256 "movq %%mm2, 32(%0, %%"REG_D") \n\t" | |
3653 | 257 |
16173 | 258 "add $16, %%"REG_S" \n\t" |
3653 | 259 " jnz 1b \n\t" |
260 "emms \n\t" | |
261 :: "r" (s16+1280), "r" (f+256) | |
16173 | 262 :"%"REG_S, "%"REG_D, "memory" |
3653 | 263 ); |
3909 | 264 return 5*256; |
265 } | |
266 | |
267 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
268 int32_t * f = (int32_t *) _f; | |
3569 | 269 asm volatile( |
16173 | 270 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
271 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 272 "pxor %%mm6, %%mm6 \n\t" |
273 "1: \n\t" | |
16173 | 274 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
275 "movq 1032(%1, %%"REG_S"), %%mm1\n\t" | |
276 "movq (%1, %%"REG_S"), %%mm2 \n\t" | |
277 "movq 8(%1, %%"REG_S"), %%mm3 \n\t" | |
3569 | 278 "psubd %%mm7, %%mm0 \n\t" |
279 "psubd %%mm7, %%mm1 \n\t" | |
280 "psubd %%mm7, %%mm2 \n\t" | |
281 "psubd %%mm7, %%mm3 \n\t" | |
282 "packssdw %%mm1, %%mm0 \n\t" | |
283 "packssdw %%mm3, %%mm2 \n\t" | |
284 "movq %%mm0, %%mm1 \n\t" | |
285 "punpcklwd %%mm2, %%mm0 \n\t" | |
286 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 287 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
288 "movq %%mm6, (%0, %%"REG_D") \n\t" | |
289 "movd %%mm0, 8(%0, %%"REG_D") \n\t" | |
3569 | 290 "punpckhdq %%mm0, %%mm0 \n\t" |
16173 | 291 "movq %%mm6, 12(%0, %%"REG_D") \n\t" |
292 "movd %%mm0, 20(%0, %%"REG_D") \n\t" | |
293 "movq %%mm6, 24(%0, %%"REG_D") \n\t" | |
294 "movd %%mm1, 32(%0, %%"REG_D") \n\t" | |
3569 | 295 "punpckhdq %%mm1, %%mm1 \n\t" |
16173 | 296 "movq %%mm6, 36(%0, %%"REG_D") \n\t" |
297 "movd %%mm1, 44(%0, %%"REG_D") \n\t" | |
298 "add $16, %%"REG_S" \n\t" | |
3569 | 299 " jnz 1b \n\t" |
300 "emms \n\t" | |
301 :: "r" (s16+1536), "r" (f+256) | |
16173 | 302 :"%"REG_S, "%"REG_D, "memory" |
3569 | 303 ); |
3909 | 304 return 6*256; |
305 } | |
306 | |
307 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
308 int32_t * f = (int32_t *) _f; | |
3576 | 309 asm volatile( |
16173 | 310 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
311 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3576 | 312 "pxor %%mm6, %%mm6 \n\t" |
313 "1: \n\t" | |
16173 | 314 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
315 "movq 2048(%1, %%"REG_S"), %%mm1\n\t" | |
316 "movq (%1, %%"REG_S"), %%mm5 \n\t" | |
3576 | 317 "psubd %%mm7, %%mm0 \n\t" |
318 "psubd %%mm7, %%mm1 \n\t" | |
319 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 320 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
3576 | 321 |
322 "pxor %%mm4, %%mm4 \n\t" | |
323 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
324 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
325 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
326 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
327 "movq %%mm0, %%mm1 \n\t" // BAba | |
328 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
329 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
330 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
331 | |
16173 | 332 "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba |
3576 | 333 "punpckhdq %%mm4, %%mm0 \n\t" // F000 |
16173 | 334 "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0 |
335 "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000 | |
336 "add $8, %%"REG_S" \n\t" | |
3576 | 337 " jnz 1b \n\t" |
338 "emms \n\t" | |
339 :: "r" (s16+1536), "r" (f+256) | |
16173 | 340 :"%"REG_S, "%"REG_D, "memory" |
3576 | 341 ); |
3909 | 342 return 6*256; |
343 } | |
344 | |
345 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
346 int32_t * f = (int32_t *) _f; | |
3578 | 347 asm volatile( |
16173 | 348 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
349 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3578 | 350 "pxor %%mm6, %%mm6 \n\t" |
351 "1: \n\t" | |
16173 | 352 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
353 "movq 3072(%1, %%"REG_S"), %%mm1\n\t" | |
354 "movq 2048(%1, %%"REG_S"), %%mm4\n\t" | |
355 "movq (%1, %%"REG_S"), %%mm5 \n\t" | |
3578 | 356 "psubd %%mm7, %%mm0 \n\t" |
357 "psubd %%mm7, %%mm1 \n\t" | |
358 "psubd %%mm7, %%mm4 \n\t" | |
359 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 360 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
3578 | 361 |
362 "packssdw %%mm4, %%mm0 \n\t" // EeAa | |
363 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
364 "movq %%mm0, %%mm2 \n\t" // EeAa | |
365 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
366 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
367 "movq %%mm0, %%mm1 \n\t" // BAba | |
368 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
369 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
370 | |
16173 | 371 "movq %%mm0, (%0, %%"REG_D") \n\t" |
3578 | 372 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 |
373 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
16173 | 374 "movq %%mm2, 8(%0, %%"REG_D") \n\t" |
375 "movq %%mm0, 16(%0, %%"REG_D") \n\t" | |
376 "add $8, %%"REG_S" \n\t" | |
3578 | 377 " jnz 1b \n\t" |
378 "emms \n\t" | |
379 :: "r" (s16+1536), "r" (f+256) | |
16173 | 380 :"%"REG_S, "%"REG_D, "memory" |
3578 | 381 ); |
3909 | 382 return 6*256; |
383 } | |
384 | |
385 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
386 int32_t * f = (int32_t *) _f; | |
3577 | 387 asm volatile( |
16173 | 388 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
389 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3577 | 390 // "pxor %%mm6, %%mm6 \n\t" |
391 "1: \n\t" | |
16173 | 392 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
393 "movq 2048(%1, %%"REG_S"), %%mm1\n\t" | |
394 "movq 3072(%1, %%"REG_S"), %%mm2\n\t" | |
395 "movq 4096(%1, %%"REG_S"), %%mm3\n\t" | |
396 "movq (%1, %%"REG_S"), %%mm5 \n\t" | |
3577 | 397 "psubd %%mm7, %%mm0 \n\t" |
398 "psubd %%mm7, %%mm1 \n\t" | |
399 "psubd %%mm7, %%mm2 \n\t" | |
400 "psubd %%mm7, %%mm3 \n\t" | |
401 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 402 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
3577 | 403 |
404 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
405 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
406 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
407 "movq %%mm0, %%mm2 \n\t" // CcAa | |
408 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
409 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
410 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
411 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
412 "movq %%mm0, %%mm1 \n\t" // BAba | |
413 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
414 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
415 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
416 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
417 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
418 | |
16173 | 419 "movq %%mm0, (%0, %%"REG_D") \n\t" |
420 "movq %%mm4, 8(%0, %%"REG_D") \n\t" | |
421 "movq %%mm2, 16(%0, %%"REG_D") \n\t" | |
422 "add $8, %%"REG_S" \n\t" | |
3577 | 423 " jnz 1b \n\t" |
424 "emms \n\t" | |
425 :: "r" (s16+1536), "r" (f+256) | |
16173 | 426 :"%"REG_S, "%"REG_D, "memory" |
3577 | 427 ); |
3909 | 428 return 6*256; |
429 } | |
430 | |
431 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
432 int32_t * f = (int32_t *) _f; | |
3575 | 433 asm volatile( |
16173 | 434 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
435 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3575 | 436 // "pxor %%mm6, %%mm6 \n\t" |
437 "1: \n\t" | |
16173 | 438 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
439 "movq 3072(%1, %%"REG_S"), %%mm1\n\t" | |
440 "movq 4096(%1, %%"REG_S"), %%mm2\n\t" | |
441 "movq 5120(%1, %%"REG_S"), %%mm3\n\t" | |
442 "movq 2048(%1, %%"REG_S"), %%mm4\n\t" | |
443 "movq (%1, %%"REG_S"), %%mm5 \n\t" | |
3575 | 444 "psubd %%mm7, %%mm0 \n\t" |
445 "psubd %%mm7, %%mm1 \n\t" | |
446 "psubd %%mm7, %%mm2 \n\t" | |
447 "psubd %%mm7, %%mm3 \n\t" | |
448 "psubd %%mm7, %%mm4 \n\t" | |
449 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 450 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
3575 | 451 |
452 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
453 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
454 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
455 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
456 "movq %%mm0, %%mm2 \n\t" // CcAa | |
457 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
458 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
459 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
460 "movq %%mm0, %%mm1 \n\t" // BAba | |
461 "movq %%mm4, %%mm3 \n\t" // FEfe | |
462 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
463 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
464 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
465 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
466 | |
16173 | 467 "movq %%mm0, (%0, %%"REG_D") \n\t" |
468 "movq %%mm4, 8(%0, %%"REG_D") \n\t" | |
469 "movq %%mm2, 16(%0, %%"REG_D") \n\t" | |
470 "add $8, %%"REG_S" \n\t" | |
3575 | 471 " jnz 1b \n\t" |
472 "emms \n\t" | |
473 :: "r" (s16+1536), "r" (f+256) | |
16173 | 474 :"%"REG_S, "%"REG_D, "memory" |
3575 | 475 ); |
3909 | 476 return 6*256; |
477 } | |
478 | |
479 | |
480 static void* a52_resample_MMX(int flags, int ch){ | |
481 switch (flags) { | |
482 case A52_MONO: | |
483 if(ch==5) return a52_resample_MONO_to_5_MMX; | |
484 break; | |
485 case A52_CHANNEL: | |
486 case A52_STEREO: | |
487 case A52_DOLBY: | |
488 if(ch==2) return a52_resample_STEREO_to_2_MMX; | |
489 break; | |
490 case A52_3F: | |
491 if(ch==5) return a52_resample_3F_to_5_MMX; | |
492 break; | |
493 case A52_2F2R: | |
494 if(ch==4) return a52_resample_2F_2R_to_4_MMX; | |
495 break; | |
496 case A52_3F2R: | |
497 if(ch==5) return a52_resample_3F_2R_to_5_MMX; | |
498 break; | |
499 case A52_MONO | A52_LFE: | |
500 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; | |
501 break; | |
502 case A52_CHANNEL | A52_LFE: | |
503 case A52_STEREO | A52_LFE: | |
504 case A52_DOLBY | A52_LFE: | |
505 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; | |
506 break; | |
507 case A52_3F | A52_LFE: | |
508 if(ch==6) return a52_resample_3F_LFE_to_6_MMX; | |
509 break; | |
510 case A52_2F2R | A52_LFE: | |
511 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; | |
512 break; | |
513 case A52_3F2R | A52_LFE: | |
514 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
515 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
516 } |
3909 | 517 return NULL; |
3626 | 518 } |
519 | |
3909 | 520 |