changeset 3094:385fa360f618 libavcodec

11% faster put_h264_qpel16_v_lowpass_mmx2
author lorenm
date Tue, 07 Feb 2006 07:35:03 +0000
parents 19f3dd63af7d
children 66987f98caae
files i386/h264dsp_mmx.c
diffstat 1 files changed, 27 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/i386/h264dsp_mmx.c	Tue Feb 07 06:25:14 2006 +0000
+++ b/i386/h264dsp_mmx.c	Tue Feb 07 07:35:03 2006 +0000
@@ -591,11 +591,11 @@
     );\
 }\
 \
-static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    int h= 2;\
+static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    int w= 2;\
     src -= 2*srcStride;\
     \
-    while(h--){\
+    while(w--){\
       asm volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
         "movd (%0), %%mm0           \n\t"\
@@ -626,8 +626,24 @@
         : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
         : "memory"\
      );\
-     src += 4-13*srcStride;\
-     dst +=  4-8*dstStride;\
+     if(h==16){\
+        asm volatile(\
+            QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+            QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+            QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+            QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+            QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+            QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+            QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+            QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+            \
+           : "+a"(src), "+c"(dst)\
+           : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+           : "memory"\
+        );\
+     }\
+     src += 4-(h+5)*srcStride;\
+     dst += 4-h*dstStride;\
    }\
 }\
 static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
@@ -731,13 +747,13 @@
     dst += 8 - size*dstStride;\
     }while(w--);\
 }\
+\
+static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
+}\
 static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
-    src += 8*srcStride;\
-    dst += 8*dstStride;\
-    OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
+    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
 }\
 \
 static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\