comparison i386/dsputil_mmx.c @ 4749:7011f597e473 libavcodec

mmx 16-bit ssd. 2.3x faster svq1 encoding.
author lorenm
date Fri, 30 Mar 2007 19:15:31 +0000
parents 30261f4ed12d
children 231daf8387b1
comparison
equal deleted inserted replaced
4748:c6a2b573f259 4749:7011f597e473
1728 } 1728 }
1729 1729
1730 1730
1731 WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx) 1731 WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
1732 WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) 1732 WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
1733
1734 static int ssd_int8_vs_int16_mmx(int8_t *pix1, int16_t *pix2, int size){
1735 int sum;
1736 long i=size;
1737 asm volatile(
1738 "pxor %%mm4, %%mm4 \n"
1739 "1: \n"
1740 "sub $8, %0 \n"
1741 "movq (%2,%0), %%mm2 \n"
1742 "movq (%3,%0,2), %%mm0 \n"
1743 "movq 8(%3,%0,2), %%mm1 \n"
1744 "punpckhbw %%mm2, %%mm3 \n"
1745 "punpcklbw %%mm2, %%mm2 \n"
1746 "psraw $8, %%mm3 \n"
1747 "psraw $8, %%mm2 \n"
1748 "psubw %%mm3, %%mm1 \n"
1749 "psubw %%mm2, %%mm0 \n"
1750 "pmaddwd %%mm1, %%mm1 \n"
1751 "pmaddwd %%mm0, %%mm0 \n"
1752 "paddd %%mm1, %%mm4 \n"
1753 "paddd %%mm0, %%mm4 \n"
1754 "jg 1b \n"
1755 "movq %%mm4, %%mm3 \n"
1756 "psrlq $32, %%mm3 \n"
1757 "paddd %%mm3, %%mm4 \n"
1758 "movd %%mm4, %1 \n"
1759 :"+r"(i), "=r"(sum)
1760 :"r"(pix1), "r"(pix2)
1761 );
1762 return sum;
1763 }
1764
1733 #endif //CONFIG_ENCODERS 1765 #endif //CONFIG_ENCODERS
1734 1766
1735 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d) 1767 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
1736 #define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d) 1768 #define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d)
1737 1769
3213 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 3245 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
3214 c->try_8x8basis= try_8x8basis_mmx; 3246 c->try_8x8basis= try_8x8basis_mmx;
3215 } 3247 }
3216 c->add_8x8basis= add_8x8basis_mmx; 3248 c->add_8x8basis= add_8x8basis_mmx;
3217 3249
3250 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
3251
3218 #endif //CONFIG_ENCODERS 3252 #endif //CONFIG_ENCODERS
3219 3253
3220 c->h263_v_loop_filter= h263_v_loop_filter_mmx; 3254 c->h263_v_loop_filter= h263_v_loop_filter_mmx;
3221 c->h263_h_loop_filter= h263_h_loop_filter_mmx; 3255 c->h263_h_loop_filter= h263_h_loop_filter_mmx;
3222 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx; 3256 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx;