diff i386/dsputil_mmx.c @ 1527:8ffd0c00e6df libavcodec

mmx2 optimization of huffyuv median encoding
author michael
date Mon, 13 Oct 2003 17:27:30 +0000
parents 7d328fd9d8a5
children 3b31998fe22f
line wrap: on
line diff
--- a/i386/dsputil_mmx.c	Mon Oct 13 14:37:04 2003 +0000
+++ b/i386/dsputil_mmx.c	Mon Oct 13 17:27:30 2003 +0000
@@ -583,6 +583,43 @@
     for(; i<w; i++)
         dst[i+0] = src1[i+0]-src2[i+0];
 }
+
+static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
+    int i=0;
+    uint8_t l, lt;
+
+    asm volatile(
+        "1:				\n\t"
+        "movq  -1(%1, %0), %%mm0	\n\t" // LT
+        "movq  (%1, %0), %%mm1		\n\t" // T
+        "movq  -1(%2, %0), %%mm2	\n\t" // L
+        "movq  (%2, %0), %%mm3		\n\t" // X
+        "movq %%mm2, %%mm4		\n\t" // L
+        "psubb %%mm0, %%mm2		\n\t"
+        "paddb %%mm1, %%mm2		\n\t" // L + T - LT
+        "movq %%mm4, %%mm5		\n\t" // L
+        "pmaxub %%mm1, %%mm4		\n\t" // max(T, L)
+        "pminub %%mm5, %%mm1		\n\t" // min(T, L)
+        "pminub %%mm2, %%mm4		\n\t" 
+        "pmaxub %%mm1, %%mm4		\n\t"
+        "psubb %%mm4, %%mm3		\n\t" // dst - pred
+        "movq %%mm3, (%3, %0)		\n\t"
+        "addl $8, %0			\n\t"
+        "cmpl %4, %0			\n\t"
+        " jb 1b				\n\t"
+        : "+r" (i)
+        : "r"(src1), "r"(src2), "r"(dst), "r"(w)
+    );
+
+    l= *left;
+    lt= *left_top;
+    
+    dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
+    
+    *left_top= src1[w-1];
+    *left    = src2[w-1];
+}
+
 #define LBUTTERFLY2(a1,b1,a2,b2)\
     "paddw " #b1 ", " #a1 "		\n\t"\
     "paddw " #b2 ", " #a2 "		\n\t"\
@@ -1699,6 +1736,8 @@
             SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2)
             SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2)
 #endif
+
+            c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
         } else if (mm_flags & MM_3DNOW) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
             c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;