comparison i386/snowdsp_mmx.c @ 5553:3c0c9723ed3c libavcodec

and of course the unneeded double subtractions were blindly put in the mmx code this also makes the affected code 4% faster
author michael
date Mon, 20 Aug 2007 22:29:21 +0000
parents d5ba514e3f4a
children a6475d1a9ea0
comparison
equal deleted inserted replaced
5552:8dcb8c89a661 5553:3c0c9723ed3c
292 292
293 { // Lift 2 293 { // Lift 2
294 DWTELEM * const ref = b+w2 - 1; 294 DWTELEM * const ref = b+w2 - 1;
295 295
296 i = 1; 296 i = 1;
297 b[0] = b[0] - (((-2 * ref[1] + W_BO) - 4 * b[0]) >> W_BS); 297 b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS);
298 asm volatile( 298 asm volatile(
299 "pslld $1, %%mm7 \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */ 299 "pcmpeqd %%mm7, %%mm7 \n\t"
300 "psrld $29, %%mm7 \n\t"
300 ::); 301 ::);
301 for(; i<w_l-3; i+=4){ 302 for(; i<w_l-3; i+=4){
302 asm volatile( 303 asm volatile(
303 "movq (%1), %%mm0 \n\t" 304 "movq (%1), %%mm0 \n\t"
304 "movq 8(%1), %%mm4 \n\t" 305 "movq 8(%1), %%mm4 \n\t"
305 "paddd 4(%1), %%mm0 \n\t" 306 "paddd 4(%1), %%mm0 \n\t"
306 "paddd 12(%1), %%mm4 \n\t" 307 "paddd 12(%1), %%mm4 \n\t"
307 "movq %%mm7, %%mm1 \n\t" 308 "paddd %%mm7, %%mm0 \n\t"
308 "movq %%mm7, %%mm5 \n\t" 309 "paddd %%mm7, %%mm4 \n\t"
309 "psubd %%mm0, %%mm1 \n\t" 310 "psrad $2, %%mm0 \n\t"
310 "psubd %%mm4, %%mm5 \n\t" 311 "psrad $2, %%mm4 \n\t"
311 "movq (%0), %%mm0 \n\t" 312 "movq (%0), %%mm1 \n\t"
312 "movq 8(%0), %%mm4 \n\t" 313 "movq 8(%0), %%mm5 \n\t"
313 "pslld $2, %%mm0 \n\t" 314 "paddd %%mm1, %%mm0 \n\t"
314 "pslld $2, %%mm4 \n\t" 315 "paddd %%mm5, %%mm4 \n\t"
315 "psubd %%mm0, %%mm1 \n\t" 316 "psrad $2, %%mm0 \n\t"
316 "psubd %%mm4, %%mm5 \n\t" 317 "psrad $2, %%mm4 \n\t"
317 "psrad $4, %%mm1 \n\t" 318 "paddd %%mm1, %%mm0 \n\t"
318 "psrad $4, %%mm5 \n\t" 319 "paddd %%mm5, %%mm4 \n\t"
319 "movq (%0), %%mm0 \n\t"
320 "movq 8(%0), %%mm4 \n\t"
321 "psubd %%mm1, %%mm0 \n\t"
322 "psubd %%mm5, %%mm4 \n\t"
323 "movq %%mm0, (%0) \n\t" 320 "movq %%mm0, (%0) \n\t"
324 "movq %%mm4, 8(%0) \n\t" 321 "movq %%mm4, 8(%0) \n\t"
325 :: "r"(&b[i]), "r"(&ref[i]) 322 :: "r"(&b[i]), "r"(&ref[i])
326 : "memory" 323 : "memory"
327 ); 324 );