diff i386/dsputil_mmx.c @ 3215:06f98047ff26 libavcodec

prefetch pixels for future motion compensation. 2-5% faster h264.
author lorenm
date Thu, 23 Mar 2006 20:16:36 +0000
parents 57d31bdbebe8
children 7aa9f80e7954
line wrap: on
line diff
--- a/i386/dsputil_mmx.c	Wed Mar 22 22:08:28 2006 +0000
+++ b/i386/dsputil_mmx.c	Thu Mar 23 20:16:36 2006 +0000
@@ -2489,6 +2489,18 @@
     }
 }
 
+#define PREFETCH(name, op) \
+void name(void *mem, int stride, int h){\
+    const uint8_t *p= mem;\
+    do{\
+        asm volatile(#op" %0" :: "m"(*p));\
+        p+= stride;\
+    }while(--h);\
+}
+PREFETCH(prefetch_mmx2,  prefetcht0)
+PREFETCH(prefetch_3dnow, prefetch)
+#undef PREFETCH
+
 #include "h264dsp_mmx.c"
 
 /* external functions, from idct_mmx.c */
@@ -2749,6 +2761,8 @@
         c->h264_idct8_add= ff_h264_idct8_add_mmx;
 
         if (mm_flags & MM_MMXEXT) {
+            c->prefetch = prefetch_mmx2;
+
             c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
             c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
 
@@ -2879,6 +2893,8 @@
             c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
 #endif //CONFIG_ENCODERS
         } else if (mm_flags & MM_3DNOW) {
+            c->prefetch = prefetch_3dnow;
+
             c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
             c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;