Mercurial > mplayer.hg
changeset 32137:b7d5e57af959
swscale: avoid reading prior to the source buffer in planar2x() MMX2
author | ramiro |
---|---|
date | Mon, 13 Sep 2010 14:25:18 +0000 |
parents | effad4f941ff |
children | 355ca1a08ea2 |
files | libswscale/rgb2rgb.c libswscale/rgb2rgb_template.c |
diffstat | 2 files changed, 15 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/libswscale/rgb2rgb.c Mon Sep 13 12:14:43 2010 +0000 +++ b/libswscale/rgb2rgb.c Mon Sep 13 14:25:18 2010 +0000 @@ -99,6 +99,7 @@ #if ARCH_X86 +DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL; DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL; DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL; DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL;
--- a/libswscale/rgb2rgb_template.c Mon Sep 13 12:14:43 2010 +0000 +++ b/libswscale/rgb2rgb_template.c Mon Sep 13 14:25:18 2010 +0000 @@ -1773,13 +1773,22 @@ const x86_reg mmxSize= srcWidth&~15; __asm__ volatile( "mov %4, %%"REG_a" \n\t" + "movq "MANGLE(mmx_ff)", %%mm0 \n\t" + "movq (%0, %%"REG_a"), %%mm4 \n\t" + "movq %%mm4, %%mm2 \n\t" + "psllq $8, %%mm4 \n\t" + "pand %%mm0, %%mm2 \n\t" + "por %%mm2, %%mm4 \n\t" + "movq (%1, %%"REG_a"), %%mm5 \n\t" + "movq %%mm5, %%mm3 \n\t" + "psllq $8, %%mm5 \n\t" + "pand %%mm0, %%mm3 \n\t" + "por %%mm3, %%mm5 \n\t" "1: \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm1 \n\t" "movq 1(%0, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"REG_a"), %%mm3 \n\t" - "movq -1(%0, %%"REG_a"), %%mm4 \n\t" - "movq -1(%1, %%"REG_a"), %%mm5 \n\t" PAVGB" %%mm0, %%mm5 \n\t" PAVGB" %%mm0, %%mm3 \n\t" PAVGB" %%mm0, %%mm5 \n\t" @@ -1806,6 +1815,8 @@ "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t" #endif "add $8, %%"REG_a" \n\t" + "movq -1(%0, %%"REG_a"), %%mm4 \n\t" + "movq -1(%1, %%"REG_a"), %%mm5 \n\t" " js 1b \n\t" :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), @@ -1815,9 +1826,9 @@ ); #else const x86_reg mmxSize=1; -#endif dst[0 ]= (3*src[0] + src[srcStride])>>2; dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; +#endif for (x=mmxSize-1; x<srcWidth-1; x++) { dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;