Mercurial > libavcodec.hg
changeset 12035:09705b027344 libavcodec
Fix h264/vp8 intra pred on Athlon XP
Whose idea was it to have a CPU that didn't SIGILL on an invalid instruction?
author | darkshikari |
---|---|
date | Thu, 01 Jul 2010 10:29:47 +0000 |
parents | f6ae68a7b1fd |
children | 93a22c0fe8fe |
files | x86/h264_intrapred.asm x86/h264dsp_mmx.c |
diffstat | 2 files changed, 12 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/x86/h264_intrapred.asm Thu Jul 01 05:17:02 2010 +0000 +++ b/x86/h264_intrapred.asm Thu Jul 01 10:29:47 2010 +0000 @@ -115,7 +115,7 @@ ; void pred16x16_dc(uint8_t *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_DC 2 +%macro PRED16x16_DC 1 cglobal pred16x16_dc_%1, 2,7 mov r4, r0 sub r0, r1 @@ -143,10 +143,6 @@ movd m0, r2d punpcklbw m0, m0 pshufw m0, m0, 0 -%elifidn %1, sse - imul r2d, 0x01010101 - movd m0, r2d - shufps m0, m0, 0 %elifidn %1, sse2 movd m0, r2d punpcklbw m0, m0 @@ -161,18 +157,18 @@ %if mmsize==8 mov r3d, 8 .loop: - %2 [r4+r1*0+0], m0 - %2 [r4+r1*0+8], m0 - %2 [r4+r1*1+0], m0 - %2 [r4+r1*1+8], m0 + mova [r4+r1*0+0], m0 + mova [r4+r1*0+8], m0 + mova [r4+r1*1+0], m0 + mova [r4+r1*1+8], m0 %else mov r3d, 4 .loop: - %2 [r4+r1*0], m0 - %2 [r4+r1*1], m0 + mova [r4+r1*0], m0 + mova [r4+r1*1], m0 lea r4, [r4+r1*2] - %2 [r4+r1*0], m0 - %2 [r4+r1*1], m0 + mova [r4+r1*0], m0 + mova [r4+r1*1], m0 %endif lea r4, [r4+r1*2] dec r3d @@ -181,11 +177,10 @@ %endmacro INIT_MMX -PRED16x16_DC mmxext, movq +PRED16x16_DC mmxext INIT_XMM -PRED16x16_DC sse, movaps -PRED16x16_DC sse2, movdqa -PRED16x16_DC ssse3, movdqa +PRED16x16_DC sse2 +PRED16x16_DC ssse3 ;----------------------------------------------------------------------------- ; void pred16x16_tm_vp8(uint8_t *src, int stride)
--- a/x86/h264dsp_mmx.c Thu Jul 01 05:17:02 2010 +0000 +++ b/x86/h264dsp_mmx.c Thu Jul 01 10:29:47 2010 +0000 @@ -2329,7 +2329,6 @@ void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride); void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride); void ff_pred16x16_dc_mmxext (uint8_t *src, int stride); -void ff_pred16x16_dc_sse (uint8_t *src, int stride); void ff_pred16x16_dc_sse2 (uint8_t *src, int stride); void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride); @@ -2384,7 +2383,6 @@ if (mm_flags & FF_MM_SSE) { h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; - h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse; } if (mm_flags & FF_MM_SSE2) {