comparison libswscale/rgb2rgb_template.c @ 32137:b7d5e57af959

swscale: avoid reading prior to the source buffer in planar2x() MMX2
author ramiro
date Mon, 13 Sep 2010 14:25:18 +0000
parents 67f44db4fee9
children 355ca1a08ea2
comparison
equal deleted inserted replaced
32136:effad4f941ff 32137:b7d5e57af959
1771 for (y=1; y<srcHeight; y++) { 1771 for (y=1; y<srcHeight; y++) {
1772 #if HAVE_MMX2 || HAVE_AMD3DNOW 1772 #if HAVE_MMX2 || HAVE_AMD3DNOW
1773 const x86_reg mmxSize= srcWidth&~15; 1773 const x86_reg mmxSize= srcWidth&~15;
1774 __asm__ volatile( 1774 __asm__ volatile(
1775 "mov %4, %%"REG_a" \n\t" 1775 "mov %4, %%"REG_a" \n\t"
1776 "movq "MANGLE(mmx_ff)", %%mm0 \n\t"
1777 "movq (%0, %%"REG_a"), %%mm4 \n\t"
1778 "movq %%mm4, %%mm2 \n\t"
1779 "psllq $8, %%mm4 \n\t"
1780 "pand %%mm0, %%mm2 \n\t"
1781 "por %%mm2, %%mm4 \n\t"
1782 "movq (%1, %%"REG_a"), %%mm5 \n\t"
1783 "movq %%mm5, %%mm3 \n\t"
1784 "psllq $8, %%mm5 \n\t"
1785 "pand %%mm0, %%mm3 \n\t"
1786 "por %%mm3, %%mm5 \n\t"
1776 "1: \n\t" 1787 "1: \n\t"
1777 "movq (%0, %%"REG_a"), %%mm0 \n\t" 1788 "movq (%0, %%"REG_a"), %%mm0 \n\t"
1778 "movq (%1, %%"REG_a"), %%mm1 \n\t" 1789 "movq (%1, %%"REG_a"), %%mm1 \n\t"
1779 "movq 1(%0, %%"REG_a"), %%mm2 \n\t" 1790 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
1780 "movq 1(%1, %%"REG_a"), %%mm3 \n\t" 1791 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
1781 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
1782 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
1783 PAVGB" %%mm0, %%mm5 \n\t" 1792 PAVGB" %%mm0, %%mm5 \n\t"
1784 PAVGB" %%mm0, %%mm3 \n\t" 1793 PAVGB" %%mm0, %%mm3 \n\t"
1785 PAVGB" %%mm0, %%mm5 \n\t" 1794 PAVGB" %%mm0, %%mm5 \n\t"
1786 PAVGB" %%mm0, %%mm3 \n\t" 1795 PAVGB" %%mm0, %%mm3 \n\t"
1787 PAVGB" %%mm1, %%mm4 \n\t" 1796 PAVGB" %%mm1, %%mm4 \n\t"
1804 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t" 1813 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t"
1805 "movq %%mm4, (%3, %%"REG_a", 2) \n\t" 1814 "movq %%mm4, (%3, %%"REG_a", 2) \n\t"
1806 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t" 1815 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t"
1807 #endif 1816 #endif
1808 "add $8, %%"REG_a" \n\t" 1817 "add $8, %%"REG_a" \n\t"
1818 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
1819 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
1809 " js 1b \n\t" 1820 " js 1b \n\t"
1810 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), 1821 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
1811 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), 1822 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
1812 "g" (-mmxSize) 1823 "g" (-mmxSize)
1813 : "%"REG_a 1824 : "%"REG_a
1814 1825
1815 ); 1826 );
1816 #else 1827 #else
1817 const x86_reg mmxSize=1; 1828 const x86_reg mmxSize=1;
1818 #endif
1819 dst[0 ]= (3*src[0] + src[srcStride])>>2; 1829 dst[0 ]= (3*src[0] + src[srcStride])>>2;
1820 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; 1830 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
1831 #endif
1821 1832
1822 for (x=mmxSize-1; x<srcWidth-1; x++) { 1833 for (x=mmxSize-1; x<srcWidth-1; x++) {
1823 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; 1834 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
1824 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; 1835 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
1825 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; 1836 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;