comparison liba52/resample.c @ 3654:33c3cff374a1

sse optimize of 3F bugfix in 3F2R
author michael
date Sat, 22 Dec 2001 00:55:58 +0000
parents b11b15df02ed
children 6312aa265429
comparison
equal deleted inserted replaced
3653:b11b15df02ed 3654:33c3cff374a1
224 "emms \n\t" 224 "emms \n\t"
225 :: "r" (s16+512), "r" (f+256) 225 :: "r" (s16+512), "r" (f+256)
226 :"%esi", "memory" 226 :"%esi", "memory"
227 ); 227 );
228 break; 228 break;
229 case A52_3F: //FIXME Optimize 229 case A52_3F:
230 for (i = 0; i < 256; i++) { 230 asm volatile(
231 s16[5*i] = convert (f[i]); 231 "movl $-1024, %%esi \n\t"
232 s16[5*i+1] = convert (f[i+512]); 232 "movq magicF2W, %%mm7 \n\t"
233 s16[5*i+2] = s16[5*i+3] = 0; 233 "pxor %%mm6, %%mm6 \n\t"
234 s16[5*i+4] = convert (f[i+256]); 234 "movq %%mm7, %%mm5 \n\t"
235 } 235 "punpckldq %%mm6, %%mm5 \n\t"
236 "1: \n\t"
237 "movd (%1, %%esi), %%mm0 \n\t"
238 "punpckldq 2048(%1, %%esi), %%mm0\n\t"
239 "movd 1024(%1, %%esi), %%mm1 \n\t"
240 "punpckldq 4(%1, %%esi), %%mm1 \n\t"
241 "movd 2052(%1, %%esi), %%mm2 \n\t"
242 "movq %%mm7, %%mm3 \n\t"
243 "punpckldq 1028(%1, %%esi), %%mm3\n\t"
244 "movd 8(%1, %%esi), %%mm4 \n\t"
245 "punpckldq 2056(%1, %%esi), %%mm4\n\t"
246 "leal (%%esi, %%esi, 4), %%edi \n\t"
247 "sarl $1, %%edi \n\t"
248 "psubd %%mm7, %%mm0 \n\t"
249 "psubd %%mm7, %%mm1 \n\t"
250 "psubd %%mm5, %%mm2 \n\t"
251 "psubd %%mm7, %%mm3 \n\t"
252 "psubd %%mm7, %%mm4 \n\t"
253 "packssdw %%mm6, %%mm0 \n\t"
254 "packssdw %%mm2, %%mm1 \n\t"
255 "packssdw %%mm4, %%mm3 \n\t"
256 "movq %%mm0, (%0, %%edi) \n\t"
257 "movq %%mm1, 8(%0, %%edi) \n\t"
258 "movq %%mm3, 16(%0, %%edi) \n\t"
259
260 "movd 1032(%1, %%esi), %%mm1 \n\t"
261 "punpckldq 12(%1, %%esi), %%mm1\n\t"
262 "movd 2060(%1, %%esi), %%mm2 \n\t"
263 "movq %%mm7, %%mm3 \n\t"
264 "punpckldq 1036(%1, %%esi), %%mm3\n\t"
265 "pxor %%mm0, %%mm0 \n\t"
266 "psubd %%mm7, %%mm1 \n\t"
267 "psubd %%mm5, %%mm2 \n\t"
268 "psubd %%mm7, %%mm3 \n\t"
269 "packssdw %%mm1, %%mm0 \n\t"
270 "packssdw %%mm3, %%mm2 \n\t"
271 "movq %%mm0, 24(%0, %%edi) \n\t"
272 "movq %%mm2, 32(%0, %%edi) \n\t"
273
274 "addl $16, %%esi \n\t"
275 " jnz 1b \n\t"
276 "emms \n\t"
277 :: "r" (s16+1280), "r" (f+256)
278 :"%esi", "%edi", "memory"
279 );
236 break; 280 break;
237 case A52_2F2R: 281 case A52_2F2R:
238 asm volatile( 282 asm volatile(
239 "movl $-1024, %%esi \n\t" 283 "movl $-1024, %%esi \n\t"
240 "movq magicF2W, %%mm7 \n\t" 284 "movq magicF2W, %%mm7 \n\t"
326 "psubd %%mm7, %%mm1 \n\t" 370 "psubd %%mm7, %%mm1 \n\t"
327 "psubd %%mm7, %%mm2 \n\t" 371 "psubd %%mm7, %%mm2 \n\t"
328 "psubd %%mm7, %%mm3 \n\t" 372 "psubd %%mm7, %%mm3 \n\t"
329 "packssdw %%mm1, %%mm0 \n\t" 373 "packssdw %%mm1, %%mm0 \n\t"
330 "packssdw %%mm3, %%mm2 \n\t" 374 "packssdw %%mm3, %%mm2 \n\t"
331 "packssdw %%mm5, %%mm4 \n\t"
332 "movq %%mm0, 24(%0, %%edi) \n\t" 375 "movq %%mm0, 24(%0, %%edi) \n\t"
333 "movq %%mm2, 32(%0, %%edi) \n\t" 376 "movq %%mm2, 32(%0, %%edi) \n\t"
334 "movq %%mm4, 40(%0, %%edi) \n\t"
335 377
336 "addl $16, %%esi \n\t" 378 "addl $16, %%esi \n\t"
337 " jnz 1b \n\t" 379 " jnz 1b \n\t"
338 "emms \n\t" 380 "emms \n\t"
339 :: "r" (s16+1280), "r" (f+256) 381 :: "r" (s16+1280), "r" (f+256)