Mercurial > libavcodec.hg
changeset 3094:385fa360f618 libavcodec
11% faster put_h264_qpel16_v_lowpass_mmx2
author | lorenm |
---|---|
date | Tue, 07 Feb 2006 07:35:03 +0000 |
parents | 19f3dd63af7d |
children | 66987f98caae |
files | i386/h264dsp_mmx.c |
diffstat | 1 files changed, 27 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/i386/h264dsp_mmx.c Tue Feb 07 06:25:14 2006 +0000 +++ b/i386/h264dsp_mmx.c Tue Feb 07 07:35:03 2006 +0000 @@ -591,11 +591,11 @@ );\ }\ \ -static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - int h= 2;\ +static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ + int w= 2;\ src -= 2*srcStride;\ \ - while(h--){\ + while(w--){\ asm volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ @@ -626,8 +626,24 @@ : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ : "memory"\ );\ - src += 4-13*srcStride;\ - dst += 4-8*dstStride;\ + if(h==16){\ + asm volatile(\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ + QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ + }\ + src += 4-(h+5)*srcStride;\ + dst += 4-h*dstStride;\ }\ }\ static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ @@ -731,13 +747,13 @@ dst += 8 - size*dstStride;\ }while(w--);\ }\ +\ +static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ +}\ static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ }\ \ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\