comparison i386/mpegvideo_mmx.c @ 4989:0b1e761135cd libavcodec

sse2 & ssse3 versions of dct_quantize. core2: mmx2=154 sse2=73 ssse3=66 (cycles) k8: mmx2=179 sse2=149 p4: mmx2=284 sse2=194
author lorenm
date Sat, 12 May 2007 05:55:09 +0000
parents bbe0bc387a19
children d5ba514e3f4a
comparison
equal deleted inserted replaced
4988:689490842cf5 4989:0b1e761135cd
671 : "+r" (block), "+r" (sum), "+r" (offset) 671 : "+r" (block), "+r" (sum), "+r" (offset)
672 : "r"(block+64) 672 : "r"(block+64)
673 ); 673 );
674 } 674 }
675 675
676 #ifdef HAVE_SSSE3
677 #define HAVE_SSSE3_BAK
678 #endif
679 #undef HAVE_SSSE3
680
681 #undef HAVE_SSE2
676 #undef HAVE_MMX2 682 #undef HAVE_MMX2
677 #define RENAME(a) a ## _MMX 683 #define RENAME(a) a ## _MMX
678 #define RENAMEl(a) a ## _mmx 684 #define RENAMEl(a) a ## _mmx
679 #include "mpegvideo_mmx_template.c" 685 #include "mpegvideo_mmx_template.c"
680 686
683 #undef RENAMEl 689 #undef RENAMEl
684 #define RENAME(a) a ## _MMX2 690 #define RENAME(a) a ## _MMX2
685 #define RENAMEl(a) a ## _mmx2 691 #define RENAMEl(a) a ## _mmx2
686 #include "mpegvideo_mmx_template.c" 692 #include "mpegvideo_mmx_template.c"
687 693
694 #define HAVE_SSE2
688 #undef RENAME 695 #undef RENAME
689 #undef RENAMEl 696 #undef RENAMEl
690 #define RENAME(a) a ## _SSE2 697 #define RENAME(a) a ## _SSE2
691 #define RENAMEl(a) a ## _sse2 698 #define RENAMEl(a) a ## _sse2
692 #include "mpegvideo_mmx_template.c" 699 #include "mpegvideo_mmx_template.c"
700
701 #ifdef HAVE_SSSE3_BAK
702 #define HAVE_SSSE3
703 #undef RENAME
704 #undef RENAMEl
705 #define RENAME(a) a ## _SSSE3
706 #define RENAMEl(a) a ## _sse2
707 #include "mpegvideo_mmx_template.c"
708 #endif
693 709
694 void MPV_common_init_mmx(MpegEncContext *s) 710 void MPV_common_init_mmx(MpegEncContext *s)
695 { 711 {
696 if (mm_flags & MM_MMX) { 712 if (mm_flags & MM_MMX) {
697 const int dct_algo = s->avctx->dct_algo; 713 const int dct_algo = s->avctx->dct_algo;
711 } else { 727 } else {
712 s->denoise_dct= denoise_dct_mmx; 728 s->denoise_dct= denoise_dct_mmx;
713 } 729 }
714 730
715 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ 731 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
732 #ifdef HAVE_SSSE3
733 if(mm_flags & MM_SSSE3){
734 s->dct_quantize= dct_quantize_SSSE3;
735 } else
736 #endif
716 if(mm_flags & MM_SSE2){ 737 if(mm_flags & MM_SSE2){
717 s->dct_quantize= dct_quantize_SSE2; 738 s->dct_quantize= dct_quantize_SSE2;
718 } else if(mm_flags & MM_MMXEXT){ 739 } else if(mm_flags & MM_MMXEXT){
719 s->dct_quantize= dct_quantize_MMX2; 740 s->dct_quantize= dct_quantize_MMX2;
720 } else { 741 } else {