changeset 12035:09705b027344 libavcodec

Fix h264/vp8 intra pred on Athlon XP Whose idea was it to have a CPU that didn't SIGILL on an invalid instruction?
author darkshikari
date Thu, 01 Jul 2010 10:29:47 +0000
parents f6ae68a7b1fd
children 93a22c0fe8fe
files x86/h264_intrapred.asm x86/h264dsp_mmx.c
diffstat 2 files changed, 12 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/x86/h264_intrapred.asm	Thu Jul 01 05:17:02 2010 +0000
+++ b/x86/h264_intrapred.asm	Thu Jul 01 10:29:47 2010 +0000
@@ -115,7 +115,7 @@
 ; void pred16x16_dc(uint8_t *src, int stride)
 ;-----------------------------------------------------------------------------
 
-%macro PRED16x16_DC 2
+%macro PRED16x16_DC 1
 cglobal pred16x16_dc_%1, 2,7
     mov       r4, r0
     sub       r0, r1
@@ -143,10 +143,6 @@
     movd       m0, r2d
     punpcklbw  m0, m0
     pshufw     m0, m0, 0
-%elifidn %1, sse
-    imul      r2d, 0x01010101
-    movd       m0, r2d
-    shufps     m0, m0, 0
 %elifidn %1, sse2
     movd       m0, r2d
     punpcklbw  m0, m0
@@ -161,18 +157,18 @@
 %if mmsize==8
     mov       r3d, 8
 .loop:
-    %2 [r4+r1*0+0], m0
-    %2 [r4+r1*0+8], m0
-    %2 [r4+r1*1+0], m0
-    %2 [r4+r1*1+8], m0
+    mova [r4+r1*0+0], m0
+    mova [r4+r1*0+8], m0
+    mova [r4+r1*1+0], m0
+    mova [r4+r1*1+8], m0
 %else
     mov       r3d, 4
 .loop:
-    %2 [r4+r1*0], m0
-    %2 [r4+r1*1], m0
+    mova [r4+r1*0], m0
+    mova [r4+r1*1], m0
     lea   r4, [r4+r1*2]
-    %2 [r4+r1*0], m0
-    %2 [r4+r1*1], m0
+    mova [r4+r1*0], m0
+    mova [r4+r1*1], m0
 %endif
     lea   r4, [r4+r1*2]
     dec   r3d
@@ -181,11 +177,10 @@
 %endmacro
 
 INIT_MMX
-PRED16x16_DC mmxext, movq
+PRED16x16_DC mmxext
 INIT_XMM
-PRED16x16_DC    sse, movaps
-PRED16x16_DC   sse2, movdqa
-PRED16x16_DC  ssse3, movdqa
+PRED16x16_DC   sse2
+PRED16x16_DC  ssse3
 
 ;-----------------------------------------------------------------------------
 ; void pred16x16_tm_vp8(uint8_t *src, int stride)
--- a/x86/h264dsp_mmx.c	Thu Jul 01 05:17:02 2010 +0000
+++ b/x86/h264dsp_mmx.c	Thu Jul 01 10:29:47 2010 +0000
@@ -2329,7 +2329,6 @@
 void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride);
 void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
 void ff_pred16x16_dc_mmxext        (uint8_t *src, int stride);
-void ff_pred16x16_dc_sse           (uint8_t *src, int stride);
 void ff_pred16x16_dc_sse2          (uint8_t *src, int stride);
 void ff_pred16x16_dc_ssse3         (uint8_t *src, int stride);
 void ff_pred16x16_tm_vp8_mmx       (uint8_t *src, int stride);
@@ -2384,7 +2383,6 @@
 
     if (mm_flags & FF_MM_SSE) {
         h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
-        h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_sse;
     }
 
     if (mm_flags & FF_MM_SSE2) {