Mercurial > libavcodec.hg
changeset 10955:fdddf3d4238f libavcodec
Use two separate memory arguments since 8+() is invalid gas syntax
author | conrad |
---|---|
date | Thu, 21 Jan 2010 09:46:57 +0000 |
parents | d7ef6611a49e |
children | 16db91dd49eb |
files | x86/h264dsp_mmx.c |
diffstat | 1 files changed, 19 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/x86/h264dsp_mmx.c Wed Jan 20 22:55:50 2010 +0000 +++ b/x86/h264dsp_mmx.c Thu Jan 21 09:46:57 2010 +0000 @@ -617,7 +617,7 @@ "pavgb %%mm2, "#tmp" \n\t"\ "pavgb "#tmp", "#q2" \n\t" /* avg(p2,avg(p0,q0)) */\ "pxor "q2addr", "#tmp" \n\t"\ - "pand %8, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\ + "pand %9, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\ "psubusb "#tmp", "#q2" \n\t" /* (p2+((p0+q0+1)>>1))>>1 */\ "movq "#p1", "#tmp" \n\t"\ "psubusb "#tc0", "#tmp" \n\t"\ @@ -631,48 +631,48 @@ DECLARE_ALIGNED_8(uint64_t, tmp0[2]); __asm__ volatile( - "movq (%1,%3), %%mm0 \n\t" //p1 - "movq (%1,%3,2), %%mm1 \n\t" //p0 - "movq (%2), %%mm2 \n\t" //q0 - "movq (%2,%3), %%mm3 \n\t" //q1 - H264_DEBLOCK_MASK(%6, %7) + "movq (%2,%4), %%mm0 \n\t" //p1 + "movq (%2,%4,2), %%mm1 \n\t" //p0 + "movq (%3), %%mm2 \n\t" //q0 + "movq (%3,%4), %%mm3 \n\t" //q1 + H264_DEBLOCK_MASK(%7, %8) - "movd %5, %%mm4 \n\t" + "movd %6, %%mm4 \n\t" "punpcklbw %%mm4, %%mm4 \n\t" "punpcklwd %%mm4, %%mm4 \n\t" "pcmpeqb %%mm3, %%mm3 \n\t" "movq %%mm4, %%mm6 \n\t" "pcmpgtb %%mm3, %%mm4 \n\t" - "movq %%mm6, 8+%0 \n\t" + "movq %%mm6, %1 \n\t" "pand %%mm4, %%mm7 \n\t" "movq %%mm7, %0 \n\t" /* filter p1 */ - "movq (%1), %%mm3 \n\t" //p2 + "movq (%2), %%mm3 \n\t" //p2 DIFF_GT2_MMX(%%mm1, %%mm3, %%mm5, %%mm6, %%mm4) // |p2-p0|>beta-1 "pand %%mm7, %%mm6 \n\t" // mask & |p2-p0|<beta - "pand 8+%0, %%mm7 \n\t" // mask & tc0 + "pand %1, %%mm7 \n\t" // mask & tc0 "movq %%mm7, %%mm4 \n\t" "psubb %%mm6, %%mm7 \n\t" "pand %%mm4, %%mm6 \n\t" // mask & |p2-p0|<beta & tc0 - H264_DEBLOCK_Q1(%%mm0, %%mm3, "(%1)", "(%1,%3)", %%mm6, %%mm4) + H264_DEBLOCK_Q1(%%mm0, %%mm3, "(%2)", "(%2,%4)", %%mm6, %%mm4) /* filter q1 */ - "movq (%2,%3,2), %%mm4 \n\t" //q2 + "movq (%3,%4,2), %%mm4 \n\t" //q2 DIFF_GT2_MMX(%%mm2, %%mm4, %%mm5, %%mm6, %%mm3) // |q2-q0|>beta-1 "pand %0, %%mm6 \n\t" - "movq 8+%0, %%mm5 \n\t" // can be merged with the and below but is slower then + "movq %1, %%mm5 \n\t" // can be merged with the and below but is slower then "pand %%mm6, %%mm5 \n\t" "psubb %%mm6, %%mm7 \n\t" - "movq (%2,%3), %%mm3 \n\t" - H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6) + "movq (%3,%4), %%mm3 \n\t" + H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%3,%4,2)", "(%3,%4)", %%mm5, %%mm6) /* filter p0, q0 */ - H264_DEBLOCK_P0_Q0(%8, unused) - "movq %%mm1, (%1,%3,2) \n\t" - "movq %%mm2, (%2) \n\t" + H264_DEBLOCK_P0_Q0(%9, unused) + "movq %%mm1, (%2,%4,2) \n\t" + "movq %%mm2, (%3) \n\t" - : "=m"(*tmp0) + : "=m"(tmp0[0]), "=m"(tmp0[1]) : "r"(pix-3*stride), "r"(pix), "r"((x86_reg)stride), "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1), "m"(ff_bone)