changeset 12266:48d6738904a9 libavcodec

Fix SPLATB_REG mess. Used to be a if/elseif/elseif/elseif spaghetti, so this splits it into small optimization-specific macros which are selected for each DSP function. The advantage of this approach is that the sse4 functions now use the ssse3 codepath also without needing an explicit sse4 codepath.
author rbultje
date Sat, 24 Jul 2010 19:33:05 +0000
parents b120f1854e35
children 8de66be22d87
files x86/vp8dsp.asm
diffstat 1 files changed, 53 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/x86/vp8dsp.asm	Sat Jul 24 17:11:51 2010 +0000
+++ b/x86/vp8dsp.asm	Sat Jul 24 19:33:05 2010 +0000
@@ -1360,22 +1360,29 @@
     movd    [%7+%9*2], m%4
 %endmacro
 
-%macro SPLATB_REG 3-4
+%macro SPLATB_REG_MMX 2-3
     movd           %1, %2
-%ifidn %3, ssse3
-    pshufb         %1, %4
-%else
     punpcklbw      %1, %1
-%if mmsize == 16 ; sse2
+    punpcklwd      %1, %1
+    punpckldq      %1, %1
+%endmacro
+
+%macro SPLATB_REG_MMXEXT 2-3
+    movd           %1, %2
+    punpcklbw      %1, %1
+    pshufw         %1, %1, 0x0
+%endmacro
+
+%macro SPLATB_REG_SSE2 2-3
+    movd           %1, %2
+    punpcklbw      %1, %1
     pshuflw        %1, %1, 0x0
     punpcklqdq     %1, %1
-%elifidn %3, mmx
-    punpcklwd      %1, %1
-    punpckldq      %1, %1
-%else ; mmxext
-    pshufw         %1, %1, 0x0
-%endif
-%endif
+%endmacro
+
+%macro SPLATB_REG_SSSE3 3
+    movd           %1, %2
+    pshufb         %1, %3
 %endmacro
 
 %macro SIMPLE_LOOPFILTER 3
@@ -1387,10 +1394,10 @@
 %if mmsize == 8 ; mmx/mmxext
     mov            r3, 2
 %endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
     pxor           m0, m0
 %endif
-    SPLATB_REG     m7, r2, %1, m0   ; splat "flim" into register
+    SPLATB_REG     m7, r2, m0       ; splat "flim" into register
 
     ; set up indexes to address 4 rows
     mov            r2, r1
@@ -1529,13 +1536,17 @@
 %endmacro
 
 INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
 SIMPLE_LOOPFILTER mmx,    v, 4
 SIMPLE_LOOPFILTER mmx,    h, 6
+%define SPLATB_REG SPLATB_REG_MMXEXT
 SIMPLE_LOOPFILTER mmxext, v, 4
 SIMPLE_LOOPFILTER mmxext, h, 6
 INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
 SIMPLE_LOOPFILTER sse2,   v, 3
 SIMPLE_LOOPFILTER sse2,   h, 6
+%define SPLATB_REG SPLATB_REG_SSSE3
 SIMPLE_LOOPFILTER ssse3,  v, 3
 SIMPLE_LOOPFILTER ssse3,  h, 6
 
@@ -1573,15 +1584,15 @@
 %define stack_reg   hev_thr_reg
 %endif
 
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
     pxor             m7, m7
 %endif
 
 %ifndef m8 ; mmx/mmxext or sse2 on x86-32
     ; splat function arguments
-    SPLATB_REG       m0, E_reg, %1, m7 ; E
-    SPLATB_REG       m1, I_reg, %1, m7 ; I
-    SPLATB_REG       m2, hev_thr_reg, %1, m7 ; hev_thresh
+    SPLATB_REG       m0, E_reg, m7   ; E
+    SPLATB_REG       m1, I_reg, m7   ; I
+    SPLATB_REG       m2, hev_thr_reg, m7 ; hev_thresh
 
     ; align stack
     mov       stack_reg, rsp         ; backup stack pointer
@@ -1614,9 +1625,9 @@
 %define q0backup m8
 
     ; splat function arguments
-    SPLATB_REG   flim_E, E_reg, %1, m7 ; E
-    SPLATB_REG   flim_I, I_reg, %1, m7 ; I
-    SPLATB_REG  hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
+    SPLATB_REG   flim_E, E_reg, m7   ; E
+    SPLATB_REG   flim_I, I_reg, m7   ; I
+    SPLATB_REG  hev_thr, hev_thr_reg, m7 ; hev_thresh
 %endif
 
 %if mmsize == 8 && %4 == 16 ; mmx/mmxext
@@ -2028,17 +2039,20 @@
 %endmacro
 
 INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
 INNER_LOOPFILTER mmx,    v, 6, 16, 0
 INNER_LOOPFILTER mmx,    h, 6, 16, 0
+INNER_LOOPFILTER mmx,    v, 6,  8, 0
+INNER_LOOPFILTER mmx,    h, 6,  8, 0
+
+%define SPLATB_REG SPLATB_REG_MMXEXT
 INNER_LOOPFILTER mmxext, v, 6, 16, 0
 INNER_LOOPFILTER mmxext, h, 6, 16, 0
-
-INNER_LOOPFILTER mmx,    v, 6,  8, 0
-INNER_LOOPFILTER mmx,    h, 6,  8, 0
 INNER_LOOPFILTER mmxext, v, 6,  8, 0
 INNER_LOOPFILTER mmxext, h, 6,  8, 0
 
 INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
 INNER_LOOPFILTER sse2,   v, 5, 16, 13
 %ifdef m8
 INNER_LOOPFILTER sse2,   h, 5, 16, 13
@@ -2048,6 +2062,7 @@
 INNER_LOOPFILTER sse2,   v, 6,  8, 13
 INNER_LOOPFILTER sse2,   h, 6,  8, 13
 
+%define SPLATB_REG SPLATB_REG_SSSE3
 INNER_LOOPFILTER ssse3,  v, 5, 16, 13
 %ifdef m8
 INNER_LOOPFILTER ssse3,  h, 5, 16, 13
@@ -2152,15 +2167,15 @@
 %define stack_reg   hev_thr_reg
 %endif
 
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
     pxor             m7, m7
 %endif
 
 %ifndef m8 ; mmx/mmxext or sse2 on x86-32
     ; splat function arguments
-    SPLATB_REG       m0, E_reg, %1, m7 ; E
-    SPLATB_REG       m1, I_reg, %1, m7 ; I
-    SPLATB_REG       m2, hev_thr_reg, %1, m7 ; hev_thresh
+    SPLATB_REG       m0, E_reg, m7   ; E
+    SPLATB_REG       m1, I_reg, m7   ; I
+    SPLATB_REG       m2, hev_thr_reg, m7 ; hev_thresh
 
     ; align stack
     mov       stack_reg, rsp         ; backup stack pointer
@@ -2200,9 +2215,9 @@
 %define lim_sign m15
 
     ; splat function arguments
-    SPLATB_REG   flim_E, E_reg, %1, m7 ; E
-    SPLATB_REG   flim_I, I_reg, %1, m7 ; I
-    SPLATB_REG  hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
+    SPLATB_REG   flim_E, E_reg, m7   ; E
+    SPLATB_REG   flim_I, I_reg, m7   ; I
+    SPLATB_REG  hev_thr, hev_thr_reg, m7 ; hev_thresh
 %endif
 
 %if mmsize == 8 && %4 == 16 ; mmx/mmxext
@@ -2696,17 +2711,20 @@
 %endmacro
 
 INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
 MBEDGE_LOOPFILTER mmx,    v, 6, 16, 0
 MBEDGE_LOOPFILTER mmx,    h, 6, 16, 0
+MBEDGE_LOOPFILTER mmx,    v, 6,  8, 0
+MBEDGE_LOOPFILTER mmx,    h, 6,  8, 0
+
+%define SPLATB_REG SPLATB_REG_MMXEXT
 MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0
 MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0
-
-MBEDGE_LOOPFILTER mmx,    v, 6,  8, 0
-MBEDGE_LOOPFILTER mmx,    h, 6,  8, 0
 MBEDGE_LOOPFILTER mmxext, v, 6,  8, 0
 MBEDGE_LOOPFILTER mmxext, h, 6,  8, 0
 
 INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
 MBEDGE_LOOPFILTER sse2,   v, 5, 16, 16
 %ifdef m8
 MBEDGE_LOOPFILTER sse2,   h, 5, 16, 16
@@ -2716,6 +2734,7 @@
 MBEDGE_LOOPFILTER sse2,   v, 6,  8, 16
 MBEDGE_LOOPFILTER sse2,   h, 6,  8, 16
 
+%define SPLATB_REG SPLATB_REG_SSSE3
 MBEDGE_LOOPFILTER ssse3,  v, 5, 16, 16
 %ifdef m8
 MBEDGE_LOOPFILTER ssse3,  h, 5, 16, 16