changeset 1261:362947395f5c libavcodec

fastdiv patch by (BERO <bero at geocities dot co dot jp>) with fixes & cleanup by me
author michaelni
date Wed, 14 May 2003 15:12:13 +0000
parents bc68a29d0dd1
children 82e0e1b9c283
files common.h h263.c i386/mpegvideo_mmx.c msmpeg4.c
diffstat 4 files changed, 26 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/common.h	Wed May 14 12:32:17 2003 +0000
+++ b/common.h	Wed May 14 15:12:13 2003 +0000
@@ -197,6 +197,25 @@
 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
 
+extern const uint32_t inverse[256];
+
+#ifdef ARCH_X86
+#    define FASTDIV(a,b) \
+    ({\
+        int ret,dmy;\
+        asm volatile(\
+            "mull %3"\
+            :"=d"(ret),"=a"(dmy)\
+            :"1"(a),"g"(inverse[b])\
+            );\
+        ret;\
+    })
+#elif defined(CONFIG_FASTDIV)
+#    define FASTDIV(a,b)   ((uint32_t)((((uint64_t)a)*inverse[b])>>32))
+#else
+#    define FASTDIV(a,b)   ((a)/(b))
+#endif
+ 
 #ifdef ARCH_X86
 // avoid +32 for shift optimization (gcc should do that ...)
 static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
--- a/h263.c	Wed May 14 12:32:17 2003 +0000
+++ b/h263.c	Wed May 14 15:12:13 2003 +0000
@@ -73,8 +73,6 @@
 static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
 static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr);
 
-extern uint32_t inverse[256];
-
 #ifdef CONFIG_ENCODERS
 static uint8_t uni_DCtab_lum_len[512];
 static uint8_t uni_DCtab_chrom_len[512];
@@ -1823,7 +1821,6 @@
 {
     int a, b, c, wrap, pred, scale;
     uint16_t *dc_val;
-    int dummy;
 
     /* find prediction */
     if (n < 4) {
@@ -1859,16 +1856,7 @@
         *dir_ptr = 0; /* left */
     }
     /* we assume pred is positive */
-#ifdef ARCH_X86
-	asm volatile (
-		"xorl %%edx, %%edx	\n\t"
-		"mul %%ecx		\n\t"
-		: "=d" (pred), "=a"(dummy)
-		: "a" (pred + (scale >> 1)), "c" (inverse[scale])
-	);
-#else
-    pred = (pred + (scale >> 1)) / scale;
-#endif
+    pred = FASTDIV((pred + (scale >> 1)), scale);
 
     /* prepare address for prediction update */
     *dc_val_ptr = &dc_val[0];
@@ -3668,8 +3656,8 @@
 	/* DC coef */
         if(s->partitioned_frame){
             level = s->dc_val[0][ s->block_index[n] ];
-            if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs
-            else    level= (level + (s->c_dc_scale>>1))/s->c_dc_scale;
+            if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
+            else    level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale);
             dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32;
         }else{
             level = mpeg4_decode_dc(s, n, &dc_pred_dir);
--- a/i386/mpegvideo_mmx.c	Wed May 14 12:32:17 2003 +0000
+++ b/i386/mpegvideo_mmx.c	Wed May 14 15:12:13 2003 +0000
@@ -26,7 +26,6 @@
 
 extern uint8_t zigzag_direct_noperm[64];
 extern uint16_t inv_zigzag_direct16[64];
-extern uint32_t inverse[256];
 
 static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
--- a/msmpeg4.c	Wed May 14 12:32:17 2003 +0000
+++ b/msmpeg4.c	Wed May 14 15:12:13 2003 +0000
@@ -78,8 +78,6 @@
 static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
 static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
 
-extern uint32_t inverse[256];
-
 
 #ifdef DEBUG
 int intra_count = 0;
@@ -699,7 +697,7 @@
             sum+=src[x + y*stride];
         }
     }
-    return (sum + (scale>>1))/scale;
+    return FASTDIV((sum + (scale>>1)), scale);
 }
 
 /* dir = 0: left, dir = 1: top prediction */
@@ -763,9 +761,9 @@
 	b = (b + (8 >> 1)) / 8;
 	c = (c + (8 >> 1)) / 8;
     } else {
-	a = (a + (scale >> 1)) / scale;
-	b = (b + (scale >> 1)) / scale;
-	c = (c + (scale >> 1)) / scale;
+	a = FASTDIV((a + (scale >> 1)), scale);
+	b = FASTDIV((b + (scale >> 1)), scale);
+	c = FASTDIV((c + (scale >> 1)), scale);
     }
 #endif
     /* XXX: WARNING: they did not choose the same test as MPEG4. This