changeset 12082:8527154f6e81 libavcodec

SSSE3 versions of vp8 width4 bilinear MC functions
author darkshikari
date Sat, 03 Jul 2010 00:48:12 +0000
parents 812e23197d64
children dd5efc28bca9
files x86/vp8dsp-init.c x86/vp8dsp.asm
diffstat 2 files changed, 35 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/x86/vp8dsp-init.c	Fri Jul 02 21:04:45 2010 +0000
+++ b/x86/vp8dsp-init.c	Sat Jul 03 00:48:12 2010 +0000
@@ -85,6 +85,12 @@
 extern void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, int dststride,
                                           uint8_t *src, int srcstride,
                                           int height, int mx, int my);
+extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
+                                          uint8_t *src, int srcstride,
+                                          int height, int mx, int my);
+extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
+                                          uint8_t *src, int srcstride,
+                                          int height, int mx, int my);
 
 extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
                                           uint8_t *src, int srcstride,
@@ -92,12 +98,13 @@
 extern void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, int dststride,
                                           uint8_t *src, int srcstride,
                                           int height, int mx, int my);
+extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
+                                          uint8_t *src, int srcstride,
+                                          int height, int mx, int my);
 extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
                                           uint8_t *src, int srcstride,
                                           int height, int mx, int my);
-extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
-                                          uint8_t *src, int srcstride,
-                                          int height, int mx, int my);
+
 
 extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
                                     uint8_t *src, int srcstride,
@@ -207,6 +214,7 @@
 HVBILIN(mmxext, 8, 16, 16)
 HVBILIN(sse2,   8,  8, 16)
 HVBILIN(sse2,   8, 16, 16)
+HVBILIN(ssse3,  8,  4,  8)
 HVBILIN(ssse3,  8,  8, 16)
 HVBILIN(ssse3,  8, 16, 16)
 
@@ -284,6 +292,7 @@
         VP8_MC_FUNC(2, 4, ssse3);
         VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
         VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
+        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
     }
 
     if (mm_flags & FF_MM_SSE4) {
--- a/x86/vp8dsp.asm	Fri Jul 02 21:04:45 2010 +0000
+++ b/x86/vp8dsp.asm	Sat Jul 03 00:48:12 2010 +0000
@@ -770,7 +770,8 @@
 INIT_XMM
 FILTER_BILINEAR   sse2, 8, 7
 
-cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
+%macro FILTER_BILINEAR_SSSE3 1
+cglobal put_vp8_bilinear%1_v_ssse3, 7,7
     shl      r6d, 4
 %ifdef PIC
     lea      r11, [bilinear_filter_vb_m]
@@ -789,9 +790,16 @@
     psraw     m1, 2
     pavgw     m0, m4
     pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh [r0+r1*0], m0
+    movh [r0+r1*1], m1
+%else
     packuswb  m0, m1
     movh   [r0+r1*0], m0
     movhps [r0+r1*1], m0
+%endif
 
     lea       r0, [r0+r1*2]
     lea       r2, [r2+r3*2]
@@ -799,7 +807,7 @@
     jg .nextrow
     REP_RET
 
-cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
+cglobal put_vp8_bilinear%1_h_ssse3, 7,7
     shl      r5d, 4
 %ifdef PIC
     lea      r11, [bilinear_filter_vb_m]
@@ -818,15 +826,28 @@
     psraw     m1, 2
     pavgw     m0, m4
     pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh [r0+r1*0], m0
+    movh [r0+r1*1], m1
+%else
     packuswb  m0, m1
     movh   [r0+r1*0], m0
     movhps [r0+r1*1], m0
+%endif
 
     lea       r0, [r0+r1*2]
     lea       r2, [r2+r3*2]
     sub       r4, 2
     jg .nextrow
     REP_RET
+%endmacro
+
+INIT_MMX
+FILTER_BILINEAR_SSSE3 4
+INIT_XMM
+FILTER_BILINEAR_SSSE3 8
 
 cglobal put_vp8_pixels8_mmx, 5,5
 .nextrow: