Mercurial > libavcodec.hg
diff i386/dsputil_mmx_avg.h @ 2209:c4a476971abc libavcodec
h264 luma motion compensation in mmx2/3dnow
author | michael |
---|---|
date | Tue, 07 Sep 2004 01:48:45 +0000 |
parents | 22b768f1261a |
children | 15cfba1b97b5 |
line wrap: on
line diff
--- a/i386/dsputil_mmx_avg.h Mon Sep 06 10:32:47 2004 +0000 +++ b/i386/dsputil_mmx_avg.h Tue Sep 07 01:48:45 2004 +0000 @@ -53,6 +53,53 @@ :"%eax", "memory"); } +static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%2), %%mm1 \n\t" + "addl %4, %1 \n\t" + "addl $4, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "addl %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "addl %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "addl %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 4(%2), %%mm1 \n\t" + "movd %%mm0, (%3) \n\t" + "addl %5, %3 \n\t" + "movd %%mm1, (%3) \n\t" + "addl %5, %3 \n\t" + "movd (%1), %%mm0 \n\t" + "addl %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "addl %4, %1 \n\t" + PAVGB" 8(%2), %%mm0 \n\t" + PAVGB" 12(%2), %%mm1 \n\t" + "movd %%mm0, (%3) \n\t" + "addl %5, %3 \n\t" + "movd %%mm1, (%3) \n\t" + "addl %5, %3 \n\t" + "addl $16, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"(src1Stride), "D"(dstStride) + :"memory"); +} + + static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { __asm __volatile( @@ -173,6 +220,58 @@ :"memory");*/ } +static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%2), %%mm1 \n\t" + "addl %4, %1 \n\t" + "addl $4, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "addl %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "addl %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "addl %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 4(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "addl %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" + "movd %%mm1, (%3) \n\t" + "addl %5, %3 \n\t" + "movd (%1), %%mm0 \n\t" + "addl %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "addl %4, %1 \n\t" + PAVGB" 8(%2), %%mm0 \n\t" + PAVGB" 12(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "addl %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" + "movd %%mm1, (%3) \n\t" + "addl %5, %3 \n\t" + "addl $16, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"(src1Stride), "D"(dstStride) + :"memory"); +} + + static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { __asm __volatile(