Mercurial > libavcodec.hg
changeset 2749:7bdf1186573b libavcodec
get rid of 2 movq (680 -> 658 dezicycles on duron)
author | michael |
---|---|
date | Wed, 01 Jun 2005 11:36:32 +0000 |
parents | 135aa7d0bd17 |
children | 704c284c827a |
files | i386/idct_mmx.c |
diffstat | 1 files changed, 15 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/i386/idct_mmx.c Wed Jun 01 08:43:40 2005 +0000 +++ b/i386/idct_mmx.c Wed Jun 01 11:36:32 2005 +0000 @@ -614,19 +614,17 @@ "psubw "#a", "#b" \n\t"\ "psubw "#c", "#d" \n\t" -/* in: a,b out: a,s */ -#define SUMSUBD2_AB( a, b, t, s ) \ - "movq "#a", "#s" \n\t"\ +#define SUMSUBD2_AB( a, b, t ) \ "movq "#b", "#t" \n\t"\ "psraw $1 , "#b" \n\t"\ - "psraw $1 , "#s" \n\t"\ - "paddw "#b", "#a" \n\t"\ - "psubw "#t", "#s" \n\t" + "paddw "#a", "#b" \n\t"\ + "psraw $1 , "#a" \n\t"\ + "psubw "#t", "#a" \n\t" -#define IDCT4_1D( s02, s13, d02, d13, t, u ) \ +#define IDCT4_1D( s02, s13, d02, d13, t ) \ SUMSUB_BA ( s02, d02 )\ - SUMSUBD2_AB( s13, d13, u, t )\ - SUMSUB_BADC( s13, s02, t, d02 ) + SUMSUBD2_AB( s13, d13, t )\ + SUMSUB_BADC( d13, s02, s13, d02 ) #define SBUTTERFLY( a, b, t, n ) \ "movq "#a", "#t" \n\t" /* abcd */\ @@ -662,22 +660,22 @@ asm volatile( /* mm1=s02+s13 mm2=s02-s13 mm4=d02+d13 mm0=d02-d13 */ - IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4, %%mm5 ) + IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 ) "movq %0, %%mm6 \n\t" /* in: 1,4,0,2 out: 1,2,3,0 */ - TRANSPOSE4( %%mm1, %%mm4, %%mm0, %%mm2, %%mm3 ) + TRANSPOSE4( %%mm3, %%mm1, %%mm0, %%mm2, %%mm4 ) - "paddw %%mm6, %%mm1 \n\t" + "paddw %%mm6, %%mm3 \n\t" /* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */ - IDCT4_1D( %%mm3, %%mm2, %%mm1, %%mm0, %%mm4, %%mm5 ) + IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 ) "pxor %%mm7, %%mm7 \n\t" :: "m"(ff_pw_32)); - STORE_DIFF_4P( %%mm2, %%mm0, %%mm7, &dst[0*stride] ); - STORE_DIFF_4P( %%mm4, %%mm0, %%mm7, &dst[1*stride] ); - STORE_DIFF_4P( %%mm1, %%mm0, %%mm7, &dst[2*stride] ); - STORE_DIFF_4P( %%mm3, %%mm0, %%mm7, &dst[3*stride] ); + STORE_DIFF_4P( %%mm0, %%mm1, %%mm7, &dst[0*stride] ); + STORE_DIFF_4P( %%mm2, %%mm1, %%mm7, &dst[1*stride] ); + STORE_DIFF_4P( %%mm3, %%mm1, %%mm7, &dst[2*stride] ); + STORE_DIFF_4P( %%mm4, %%mm1, %%mm7, &dst[3*stride] ); }