diff x86/vp8dsp-init.c @ 12210:baf13deed97e libavcodec

Various VP8 x86 deblocking speedups SSSE3 versions, improve SSE2 versions a bit. SSE2/SSSE3 mbedge h functions are currently broken, so explicitly disable them.
author darkshikari
date Wed, 21 Jul 2010 22:11:03 +0000
parents 9eef00a43280
children 657d353cd515
line wrap: on
line diff
--- a/x86/vp8dsp-init.c	Wed Jul 21 20:51:01 2010 +0000
+++ b/x86/vp8dsp-init.c	Wed Jul 21 22:11:03 2010 +0000
@@ -223,64 +223,31 @@
 extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
 extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
 
-extern void ff_vp8_v_loop_filter_simple_mmx   (uint8_t *dst, int stride, int flim);
-extern void ff_vp8_v_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim);
-extern void ff_vp8_v_loop_filter_simple_sse2  (uint8_t *dst, int stride, int flim);
-extern void ff_vp8_h_loop_filter_simple_mmx   (uint8_t *dst, int stride, int flim);
-extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim);
-extern void ff_vp8_h_loop_filter_simple_sse2  (uint8_t *dst, int stride, int flim);
-
-extern void ff_vp8_v_loop_filter16y_inner_mmx   (uint8_t *dst, int stride,
-                                                 int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
-                                                 int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter16y_inner_sse2  (uint8_t *dst, int stride,
-                                                 int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16y_inner_mmx   (uint8_t *dst, int stride,
-                                                 int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
-                                                 int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16y_inner_sse2  (uint8_t *dst, int stride,
-                                                 int e, int i, int hvt);
+#define DECLARE_LOOP_FILTER(NAME)\
+extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
+extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
+extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
+                                                    int e, int i, int hvt);\
+extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
+                                                    int e, int i, int hvt);\
+extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
+                                                    int s, int e, int i, int hvt);\
+extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
+                                                    int s, int e, int i, int hvt);\
+extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
+                                                    int e, int i, int hvt);\
+extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
+                                                    int e, int i, int hvt);\
+extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
+                                                    int s, int e, int i, int hvt);\
+extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
+                                                    int s, int e, int i, int hvt);
 
-extern void ff_vp8_v_loop_filter8uv_inner_mmx   (uint8_t *dstU, uint8_t *dstV,
-                                                 int s, int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
-                                                 int s, int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter8uv_inner_sse2  (uint8_t *dstU, uint8_t *dstV,
-                                                 int s, int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter8uv_inner_mmx   (uint8_t *dstU, uint8_t *dstV,
-                                                 int s, int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
-                                                 int s, int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter8uv_inner_sse2  (uint8_t *dstU, uint8_t *dstV,
-                                                 int s, int e, int i, int hvt);
+DECLARE_LOOP_FILTER(mmx)
+DECLARE_LOOP_FILTER(mmxext)
+DECLARE_LOOP_FILTER(sse2)
+DECLARE_LOOP_FILTER(ssse3)
 
-extern void ff_vp8_v_loop_filter16y_mbedge_mmx   (uint8_t *dst, int stride,
-                                                  int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride,
-                                                  int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter16y_mbedge_sse2  (uint8_t *dst, int stride,
-                                                  int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16y_mbedge_mmx   (uint8_t *dst, int stride,
-                                                  int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride,
-                                                  int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16y_mbedge_sse2  (uint8_t *dst, int stride,
-                                                  int e, int i, int hvt);
-
-extern void ff_vp8_v_loop_filter8uv_mbedge_mmx   (uint8_t *dstU, uint8_t *dstV,
-                                                  int s, int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV,
-                                                  int s, int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter8uv_mbedge_sse2  (uint8_t *dstU, uint8_t *dstV,
-                                                  int s, int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter8uv_mbedge_mmx   (uint8_t *dstU, uint8_t *dstV,
-                                                  int s, int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV,
-                                                  int s, int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter8uv_mbedge_sse2  (uint8_t *dstU, uint8_t *dstV,
-                                                  int s, int e, int i, int hvt);
 #endif
 
 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
@@ -384,8 +351,8 @@
         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
 
-        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
-        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
+        //c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
+        //c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
     }
 
     if (mm_flags & FF_MM_SSSE3) {
@@ -395,6 +362,19 @@
         VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
         VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
         VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
+
+        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
+        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
+
+        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
+        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
+        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
+        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
+
+        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
+        //c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
+        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
+        //c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
     }
 
     if (mm_flags & FF_MM_SSE4) {