# HG changeset patch # User darkshikari # Date 1277980187 0 # Node ID 09705b027344dae80689f8f595fa52cb31c60065 # Parent f6ae68a7b1fd54fab5cb08878ec775342f0adf48 Fix h264/vp8 intra pred on Athlon XP Whose idea was it to have a CPU that didn't SIGILL on an invalid instruction? diff -r f6ae68a7b1fd -r 09705b027344 x86/h264_intrapred.asm --- a/x86/h264_intrapred.asm Thu Jul 01 05:17:02 2010 +0000 +++ b/x86/h264_intrapred.asm Thu Jul 01 10:29:47 2010 +0000 @@ -115,7 +115,7 @@ ; void pred16x16_dc(uint8_t *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_DC 2 +%macro PRED16x16_DC 1 cglobal pred16x16_dc_%1, 2,7 mov r4, r0 sub r0, r1 @@ -143,10 +143,6 @@ movd m0, r2d punpcklbw m0, m0 pshufw m0, m0, 0 -%elifidn %1, sse - imul r2d, 0x01010101 - movd m0, r2d - shufps m0, m0, 0 %elifidn %1, sse2 movd m0, r2d punpcklbw m0, m0 @@ -161,18 +157,18 @@ %if mmsize==8 mov r3d, 8 .loop: - %2 [r4+r1*0+0], m0 - %2 [r4+r1*0+8], m0 - %2 [r4+r1*1+0], m0 - %2 [r4+r1*1+8], m0 + mova [r4+r1*0+0], m0 + mova [r4+r1*0+8], m0 + mova [r4+r1*1+0], m0 + mova [r4+r1*1+8], m0 %else mov r3d, 4 .loop: - %2 [r4+r1*0], m0 - %2 [r4+r1*1], m0 + mova [r4+r1*0], m0 + mova [r4+r1*1], m0 lea r4, [r4+r1*2] - %2 [r4+r1*0], m0 - %2 [r4+r1*1], m0 + mova [r4+r1*0], m0 + mova [r4+r1*1], m0 %endif lea r4, [r4+r1*2] dec r3d @@ -181,11 +177,10 @@ %endmacro INIT_MMX -PRED16x16_DC mmxext, movq +PRED16x16_DC mmxext INIT_XMM -PRED16x16_DC sse, movaps -PRED16x16_DC sse2, movdqa -PRED16x16_DC ssse3, movdqa +PRED16x16_DC sse2 +PRED16x16_DC ssse3 ;----------------------------------------------------------------------------- ; void pred16x16_tm_vp8(uint8_t *src, int stride) diff -r f6ae68a7b1fd -r 09705b027344 x86/h264dsp_mmx.c --- a/x86/h264dsp_mmx.c Thu Jul 01 05:17:02 2010 +0000 +++ b/x86/h264dsp_mmx.c Thu Jul 01 10:29:47 2010 +0000 @@ -2329,7 +2329,6 @@ void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride); void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride); void ff_pred16x16_dc_mmxext (uint8_t *src, int stride); -void ff_pred16x16_dc_sse (uint8_t *src, int stride); void ff_pred16x16_dc_sse2 (uint8_t *src, int stride); void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride); @@ -2384,7 +2383,6 @@ if (mm_flags & FF_MM_SSE) { h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; - h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse; } if (mm_flags & FF_MM_SSE2) {