diff x86/vp8dsp-init.c @ 12054:b8f80fe02861 libavcodec

SSSE3 versions of width4 VP8 6-tap MC functions Also make some small changes to saturation order of 4-tap SSSE3 MC to fix a non-bitexactness bug. Patch mostly by Eli Friedman <eli.friedman AT gmail DOT com>.
author darkshikari
date Fri, 02 Jul 2010 05:27:41 +0000
parents dc4feabd4dab
children 8527154f6e81
line wrap: on
line diff
--- a/x86/vp8dsp-init.c	Fri Jul 02 00:30:27 2010 +0000
+++ b/x86/vp8dsp-init.c	Fri Jul 02 05:27:41 2010 +0000
@@ -54,6 +54,18 @@
                                        uint8_t *src, int srcstride,
                                        int height, int mx, int my);
 
+extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride,
+                                       uint8_t *src, int srcstride,
+                                       int height, int mx, int my);
+extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride,
+                                       uint8_t *src, int srcstride,
+                                       int height, int mx, int my);
+extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride,
+                                       uint8_t *src, int srcstride,
+                                       int height, int mx, int my);
+extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride,
+                                       uint8_t *src, int srcstride,
+                                       int height, int mx, int my);
 extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride,
                                        uint8_t *src, int srcstride,
                                        int height, int mx, int my);
@@ -173,6 +185,11 @@
 HVTAPSSE2(6, 6, 8)
 HVTAPSSE2(6, 6, 16)
 
+HVTAP(ssse3, 16, 4, 4, 4, 8)
+HVTAP(ssse3, 16, 4, 6, 4, 8)
+HVTAP(ssse3, 16, 6, 4, 4, 8)
+HVTAP(ssse3, 16, 6, 6, 4, 8)
+
 #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
 static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
     uint8_t *dst, int dststride, uint8_t *src, \
@@ -264,6 +281,7 @@
     if (mm_flags & FF_MM_SSSE3) {
         VP8_LUMA_MC_FUNC(0, 16, ssse3);
         VP8_MC_FUNC(1, 8, ssse3);
+        VP8_MC_FUNC(2, 4, ssse3);
         VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
         VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
     }