comparison x86/vc1dsp_mmx.c @ 12144:846779f6b164 libavcodec

MMX/SSE VC1 loop filter
author conrad
date Sun, 11 Jul 2010 22:53:01 +0000
parents fdafbcef52f5
children d38e8565ba05
comparison
equal deleted inserted replaced
12143:fa452b243aa6 12144:846779f6b164
687 "+m"(*(uint32_t*)(dest+2*linesize)), 687 "+m"(*(uint32_t*)(dest+2*linesize)),
688 "+m"(*(uint32_t*)(dest+3*linesize)) 688 "+m"(*(uint32_t*)(dest+3*linesize))
689 ); 689 );
690 } 690 }
691 691
692 #define LOOP_FILTER(EXT) \
693 void ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
694 void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
695 void ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, int stride, int pq); \
696 void ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, int stride, int pq); \
697 \
698 static void vc1_v_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
699 { \
700 ff_vc1_v_loop_filter8_ ## EXT(src, stride, pq); \
701 ff_vc1_v_loop_filter8_ ## EXT(src+8, stride, pq); \
702 } \
703 \
704 static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
705 { \
706 ff_vc1_h_loop_filter8_ ## EXT(src, stride, pq); \
707 ff_vc1_h_loop_filter8_ ## EXT(src+8*stride, stride, pq); \
708 }
709
710 #if HAVE_YASM
711 LOOP_FILTER(mmx)
712 LOOP_FILTER(mmx2)
713 LOOP_FILTER(sse2)
714 LOOP_FILTER(ssse3)
715
716 void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq);
717
718 static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
719 {
720 ff_vc1_h_loop_filter8_sse4(src, stride, pq);
721 ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
722 }
723 #endif
724
692 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) { 725 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
693 mm_flags = mm_support(); 726 mm_flags = mm_support();
694 727
695 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx; 728 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
696 dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx; 729 dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
736 dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2; 769 dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2;
737 dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2; 770 dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2;
738 dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2; 771 dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2;
739 dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2; 772 dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2;
740 } 773 }
741 } 774
775 #define ASSIGN_LF(EXT) \
776 dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \
777 dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \
778 dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \
779 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \
780 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
781 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
782
783 #if HAVE_YASM
784 if (mm_flags & FF_MM_MMX) {
785 ASSIGN_LF(mmx);
786 }
787 return;
788 if (mm_flags & FF_MM_MMX2) {
789 ASSIGN_LF(mmx2);
790 }
791 if (mm_flags & FF_MM_SSE2) {
792 dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2;
793 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2;
794 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
795 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
796 }
797 if (mm_flags & FF_MM_SSSE3) {
798 ASSIGN_LF(ssse3);
799 }
800 if (mm_flags & FF_MM_SSE4) {
801 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4;
802 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
803 }
804 #endif
805 }