changeset 2746:d04ce91101cf libavcodec

add rounding bias before the horizontal idct (765->730 dezicyles on duron)
author michael
date Wed, 01 Jun 2005 01:18:41 +0000
parents 42d3e9068e32
children 6eded34ab57b
files i386/idct_mmx.c
diffstat 1 files changed, 3 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/i386/idct_mmx.c	Tue May 31 22:48:33 2005 +0000
+++ b/i386/idct_mmx.c	Wed Jun 01 01:18:41 2005 +0000
@@ -641,7 +641,6 @@
 
 #define STORE_DIFF_4P( p, t, pw32, z, dst ) \
     asm volatile(\
-        "paddw     "#pw32", "#p" \n\t"\
         "psraw      $6,     "#p" \n\t"\
         "movd       (%0),   "#t" \n\t"\
         "punpcklbw "#z",    "#t" \n\t"\
@@ -665,9 +664,12 @@
         /* mm1=s02+s13  mm2=s02-s13  mm4=d02+d13  mm0=d02-d13 */
         IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4, %%mm5 )
 
+        "movq     ff_pw_32, %%mm6 \n\t"
         /* in: 1,4,0,2  out: 1,2,3,0 */
         TRANSPOSE4( %%mm1, %%mm4, %%mm0, %%mm2, %%mm3 )
 
+        "paddw     %%mm6, %%mm1 \n\t"
+
         /* mm2=s02+s13  mm3=s02-s13  mm4=d02+d13  mm1=d02-d13 */
         IDCT4_1D( %%mm3, %%mm2, %%mm1, %%mm0, %%mm4, %%mm5 )
 
@@ -675,7 +677,6 @@
         TRANSPOSE4( %%mm2, %%mm4, %%mm1, %%mm3, %%mm0 )
 
         "pxor %%mm7, %%mm7    \n\t"
-        "movq ff_pw_32, %%mm6 \n\t"
     :: );
 
     STORE_DIFF_4P( %%mm2, %%mm4, %%mm6, %%mm7, &dst[0*stride] );