Mercurial > libavcodec.hg
changeset 1647:c943c1d2d099 libavcodec
h263_v_loop_filter_mmx
author | michael |
---|---|
date | Tue, 02 Dec 2003 20:28:10 +0000 |
parents | c3c166ead03a |
children | de28264c3dc3 |
files | i386/dsputil_mmx.c |
diffstat | 1 files changed, 92 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/i386/dsputil_mmx.c Mon Dec 01 20:04:14 2003 +0000 +++ b/i386/dsputil_mmx.c Tue Dec 02 20:28:10 2003 +0000 @@ -22,6 +22,8 @@ #include "../dsputil.h" #include "../simple_idct.h" +extern const uint8_t ff_h263_loop_filter_strength[32]; + int mm_flags; /* multimedia extension flags */ /* pixel operations */ @@ -34,6 +36,8 @@ static const uint64_t ff_pw_16 __attribute__ ((aligned(8))) = 0x0010001000100010ULL; static const uint64_t ff_pw_15 __attribute__ ((aligned(8))) = 0x000F000F000F000FULL; +static const uint64_t ff_pb_FC __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL; + #define JUMPALIGN() __asm __volatile (".balign 8"::) #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) @@ -465,6 +469,92 @@ dst[i+0] += src[i+0]; } +static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){ + const int strength= ff_h263_loop_filter_strength[qscale]; + + asm volatile( + "pxor %%mm7, %%mm7 \n\t" + "movq %0, %%mm0 \n\t" + "movq %0, %%mm1 \n\t" + "movq %3, %%mm2 \n\t" + "movq %3, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "movq %1, %%mm2 \n\t" + "movq %1, %%mm3 \n\t" + "movq %2, %%mm4 \n\t" + "movq %2, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "psubw %%mm2, %%mm4 \n\t" + "psubw %%mm3, %%mm5 \n\t" + "psllw $2, %%mm4 \n\t" + "psllw $2, %%mm5 \n\t" + "paddw %%mm0, %%mm4 \n\t" + "paddw %%mm1, %%mm5 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "pcmpgtw %%mm4, %%mm6 \n\t" + "pcmpgtw %%mm5, %%mm7 \n\t" + "pxor %%mm6, %%mm4 \n\t" + "pxor %%mm7, %%mm5 \n\t" + "psubw %%mm6, %%mm4 \n\t" + "psubw %%mm7, %%mm5 \n\t" + "psrlw $3, %%mm4 \n\t" + "psrlw $3, %%mm5 \n\t" + "packuswb %%mm5, %%mm4 \n\t" //abs(d) + "packsswb %%mm7, %%mm6 \n\t" //sign(d) + "pxor %%mm7, %%mm7 \n\t" + "movd %4, %%mm2 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" //2*strength + "psubusb %%mm4, %%mm2 \n\t" // S(2*strength - abs(d)) + "movq %%mm2, %%mm3 \n\t" // S(2*strength - abs(d)) + "psubusb %%mm4, %%mm3 \n\t" // S(S(2*strength - abs(d)) - abs(d)) + "psubb %%mm3, %%mm2 \n\t" // MIN(abs(d), S(2*strength - abs(d))) + "movq %1, %%mm3 \n\t" + "movq %2, %%mm4 \n\t" + "pxor %%mm6, %%mm3 \n\t" + "pxor %%mm6, %%mm4 \n\t" + "paddusb %%mm2, %%mm3 \n\t" + "psubusb %%mm2, %%mm4 \n\t" + "pxor %%mm6, %%mm3 \n\t" + "pxor %%mm6, %%mm4 \n\t" + "movq %%mm3, %1 \n\t" + "movq %%mm4, %2 \n\t" + "paddusb %%mm2, %%mm2 \n\t" + "packsswb %%mm1, %%mm0 \n\t" + "pcmpgtb %%mm0, %%mm7 \n\t" + "pxor %%mm7, %%mm0 \n\t" + "psubb %%mm7, %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "psubusb %%mm2, %%mm0 \n\t" + "psubb %%mm0, %%mm1 \n\t" + "pand %5, %%mm1 \n\t" + "psrlw $2, %%mm1 \n\t" + "pxor %%mm7, %%mm1 \n\t" + "psubb %%mm7, %%mm1 \n\t" + "movq %0, %%mm3 \n\t" + "movq %3, %%mm4 \n\t" + "psubb %%mm1, %%mm3 \n\t" + "paddb %%mm1, %%mm4 \n\t" + "movq %%mm3, %0 \n\t" + "movq %%mm4, %3 \n\t" + + : "+m" (*(uint64_t*)(src - 2*stride)), + "+m" (*(uint64_t*)(src - 1*stride)), + "+m" (*(uint64_t*)(src + 0*stride)), + "+m" (*(uint64_t*)(src + 1*stride)) + : "g" (2*strength), "m"(ff_pb_FC) + ); +} + #ifdef CONFIG_ENCODERS static int pix_norm1_mmx(uint8_t *pix, int line_size) { int tmp; @@ -1691,6 +1781,8 @@ c->pix_norm1 = pix_norm1_mmx; c->sse[0] = sse16_mmx; #endif //CONFIG_ENCODERS + + c->h263_v_loop_filter= h263_v_loop_filter_mmx; if (mm_flags & MM_MMXEXT) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;