Mercurial > libavcodec.hg
comparison i386/snowdsp_mmx.c @ 5553:3c0c9723ed3c libavcodec
and of course the unneeded double subtractions were blindly put in the
mmx code
this also makes the affected code 4% faster
author | michael |
---|---|
date | Mon, 20 Aug 2007 22:29:21 +0000 |
parents | d5ba514e3f4a |
children | a6475d1a9ea0 |
comparison
equal
deleted
inserted
replaced
5552:8dcb8c89a661 | 5553:3c0c9723ed3c |
---|---|
292 | 292 |
293 { // Lift 2 | 293 { // Lift 2 |
294 DWTELEM * const ref = b+w2 - 1; | 294 DWTELEM * const ref = b+w2 - 1; |
295 | 295 |
296 i = 1; | 296 i = 1; |
297 b[0] = b[0] - (((-2 * ref[1] + W_BO) - 4 * b[0]) >> W_BS); | 297 b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS); |
298 asm volatile( | 298 asm volatile( |
299 "pslld $1, %%mm7 \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */ | 299 "pcmpeqd %%mm7, %%mm7 \n\t" |
300 "psrld $29, %%mm7 \n\t" | |
300 ::); | 301 ::); |
301 for(; i<w_l-3; i+=4){ | 302 for(; i<w_l-3; i+=4){ |
302 asm volatile( | 303 asm volatile( |
303 "movq (%1), %%mm0 \n\t" | 304 "movq (%1), %%mm0 \n\t" |
304 "movq 8(%1), %%mm4 \n\t" | 305 "movq 8(%1), %%mm4 \n\t" |
305 "paddd 4(%1), %%mm0 \n\t" | 306 "paddd 4(%1), %%mm0 \n\t" |
306 "paddd 12(%1), %%mm4 \n\t" | 307 "paddd 12(%1), %%mm4 \n\t" |
307 "movq %%mm7, %%mm1 \n\t" | 308 "paddd %%mm7, %%mm0 \n\t" |
308 "movq %%mm7, %%mm5 \n\t" | 309 "paddd %%mm7, %%mm4 \n\t" |
309 "psubd %%mm0, %%mm1 \n\t" | 310 "psrad $2, %%mm0 \n\t" |
310 "psubd %%mm4, %%mm5 \n\t" | 311 "psrad $2, %%mm4 \n\t" |
311 "movq (%0), %%mm0 \n\t" | 312 "movq (%0), %%mm1 \n\t" |
312 "movq 8(%0), %%mm4 \n\t" | 313 "movq 8(%0), %%mm5 \n\t" |
313 "pslld $2, %%mm0 \n\t" | 314 "paddd %%mm1, %%mm0 \n\t" |
314 "pslld $2, %%mm4 \n\t" | 315 "paddd %%mm5, %%mm4 \n\t" |
315 "psubd %%mm0, %%mm1 \n\t" | 316 "psrad $2, %%mm0 \n\t" |
316 "psubd %%mm4, %%mm5 \n\t" | 317 "psrad $2, %%mm4 \n\t" |
317 "psrad $4, %%mm1 \n\t" | 318 "paddd %%mm1, %%mm0 \n\t" |
318 "psrad $4, %%mm5 \n\t" | 319 "paddd %%mm5, %%mm4 \n\t" |
319 "movq (%0), %%mm0 \n\t" | |
320 "movq 8(%0), %%mm4 \n\t" | |
321 "psubd %%mm1, %%mm0 \n\t" | |
322 "psubd %%mm5, %%mm4 \n\t" | |
323 "movq %%mm0, (%0) \n\t" | 320 "movq %%mm0, (%0) \n\t" |
324 "movq %%mm4, 8(%0) \n\t" | 321 "movq %%mm4, 8(%0) \n\t" |
325 :: "r"(&b[i]), "r"(&ref[i]) | 322 :: "r"(&b[i]), "r"(&ref[i]) |
326 : "memory" | 323 : "memory" |
327 ); | 324 ); |