Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 3536:545a15c19c91 libavcodec
sse & sse2 implementations of vorbis channel coupling.
9% faster vorbis (on a K8).
author | lorenm |
---|---|
date | Thu, 03 Aug 2006 03:18:47 +0000 |
parents | 419409926166 |
children | 3fbddeb13686 |
comparison
equal
deleted
inserted
replaced
3535:a14c98a0ca3d | 3536:545a15c19c91 |
---|---|
2709 ff_idct_xvid_mmx2 (block); | 2709 ff_idct_xvid_mmx2 (block); |
2710 add_pixels_clamped_mmx(block, dest, line_size); | 2710 add_pixels_clamped_mmx(block, dest, line_size); |
2711 } | 2711 } |
2712 #endif | 2712 #endif |
2713 | 2713 |
2714 static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) | |
2715 { | |
2716 int i; | |
2717 asm volatile("pxor %%mm7, %%mm7":); | |
2718 for(i=0; i<blocksize; i+=2) { | |
2719 asm volatile( | |
2720 "movq %0, %%mm0 \n\t" | |
2721 "movq %1, %%mm1 \n\t" | |
2722 "movq %%mm0, %%mm2 \n\t" | |
2723 "movq %%mm1, %%mm3 \n\t" | |
2724 "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 | |
2725 "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 | |
2726 "pslld $31, %%mm2 \n\t" // keep only the sign bit | |
2727 "pxor %%mm2, %%mm1 \n\t" | |
2728 "movq %%mm3, %%mm4 \n\t" | |
2729 "pand %%mm1, %%mm3 \n\t" | |
2730 "pandn %%mm1, %%mm4 \n\t" | |
2731 "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m))) | |
2732 "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m))) | |
2733 "movq %%mm3, %1 \n\t" | |
2734 "movq %%mm0, %0 \n\t" | |
2735 :"+m"(mag[i]), "+m"(ang[i]) | |
2736 ::"memory" | |
2737 ); | |
2738 } | |
2739 asm volatile("emms"); | |
2740 } | |
2741 static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize) | |
2742 { | |
2743 int i; | |
2744 for(i=0; i<blocksize; i+=4) { | |
2745 asm volatile( | |
2746 "movaps %0, %%xmm0 \n\t" | |
2747 "movaps %1, %%xmm1 \n\t" | |
2748 "pxor %%xmm2, %%xmm2 \n\t" | |
2749 "pxor %%xmm3, %%xmm3 \n\t" | |
2750 "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 | |
2751 "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 | |
2752 "pslld $31, %%xmm2 \n\t" // keep only the sign bit | |
2753 "pxor %%xmm2, %%xmm1 \n\t" | |
2754 "movaps %%xmm3, %%xmm4 \n\t" | |
2755 "pand %%xmm1, %%xmm3 \n\t" | |
2756 "pandn %%xmm1, %%xmm4 \n\t" | |
2757 "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m))) | |
2758 "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m))) | |
2759 "movaps %%xmm3, %1 \n\t" | |
2760 "movaps %%xmm0, %0 \n\t" | |
2761 :"+m"(mag[i]), "+m"(ang[i]) | |
2762 ::"memory" | |
2763 ); | |
2764 } | |
2765 } | |
2766 | |
2714 #ifdef CONFIG_SNOW_ENCODER | 2767 #ifdef CONFIG_SNOW_ENCODER |
2715 extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); | 2768 extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); |
2716 extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); | 2769 extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); |
2717 extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | 2770 extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); |
2718 extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | 2771 extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); |
3135 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; | 3188 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; |
3136 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; | 3189 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; |
3137 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; | 3190 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; |
3138 } | 3191 } |
3139 #endif | 3192 #endif |
3193 | |
3194 if(mm_flags & MM_SSE2) | |
3195 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2; | |
3196 else if(mm_flags & MM_SSE) | |
3197 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; | |
3140 } | 3198 } |
3141 | 3199 |
3142 #ifdef CONFIG_ENCODERS | 3200 #ifdef CONFIG_ENCODERS |
3143 dsputil_init_pix_mmx(c, avctx); | 3201 dsputil_init_pix_mmx(c, avctx); |
3144 #endif //CONFIG_ENCODERS | 3202 #endif //CONFIG_ENCODERS |