comparison x86/dsputil_mmx.c @ 11499:2a4dc3c0b012 libavcodec

Move H264 dsputil functions into their own struct This moves the H264-specific functions from DSPContext to the new H264DSPContext. The code is made conditional on CONFIG_H264DSP which is set by the codecs requiring it. The qpel and chroma MC functions are not moved as these are used by non-h264 code.
author mru
date Tue, 16 Mar 2010 01:17:00 +0000
parents 0f0cd6b5791f
children f7281af560fe
comparison
equal deleted inserted replaced
11498:8e889ce0d616 11499:2a4dc3c0b012
22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> 22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
23 */ 23 */
24 24
25 #include "libavutil/x86_cpu.h" 25 #include "libavutil/x86_cpu.h"
26 #include "libavcodec/dsputil.h" 26 #include "libavcodec/dsputil.h"
27 #include "libavcodec/h264dsp.h"
27 #include "libavcodec/mpegvideo.h" 28 #include "libavcodec/mpegvideo.h"
28 #include "libavcodec/simple_idct.h" 29 #include "libavcodec/simple_idct.h"
29 #include "dsputil_mmx.h" 30 #include "dsputil_mmx.h"
30 #include "vp3dsp_mmx.h" 31 #include "vp3dsp_mmx.h"
31 #include "vp3dsp_sse2.h" 32 #include "vp3dsp_sse2.h"
2616 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd; 2617 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd;
2617 2618
2618 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; 2619 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;
2619 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; 2620 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;
2620 2621
2621 c->h264_idct_dc_add=
2622 c->h264_idct_add= ff_h264_idct_add_mmx;
2623 c->h264_idct8_dc_add=
2624 c->h264_idct8_add= ff_h264_idct8_add_mmx;
2625
2626 c->h264_idct_add16 = ff_h264_idct_add16_mmx;
2627 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
2628 c->h264_idct_add8 = ff_h264_idct_add8_mmx;
2629 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
2630
2631 if (CONFIG_VP6_DECODER) { 2622 if (CONFIG_VP6_DECODER) {
2632 c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; 2623 c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
2633 } 2624 }
2634 2625
2635 if (mm_flags & FF_MM_MMX2) { 2626 if (mm_flags & FF_MM_MMX2) {
2646 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; 2637 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
2647 2638
2648 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; 2639 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
2649 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; 2640 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
2650 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; 2641 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
2651
2652 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
2653 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
2654 c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
2655 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
2656 c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
2657 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
2658 2642
2659 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 2643 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2660 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; 2644 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
2661 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; 2645 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
2662 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; 2646 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
2714 2698
2715 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd; 2699 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd;
2716 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; 2700 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
2717 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; 2701 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
2718 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; 2702 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
2719 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
2720 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
2721 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
2722 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
2723 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
2724 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
2725 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
2726
2727 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
2728 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
2729 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
2730 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
2731 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
2732 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
2733 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
2734 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
2735
2736 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
2737 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
2738 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
2739 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
2740 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
2741 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
2742 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
2743 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
2744 2703
2745 #if HAVE_YASM 2704 #if HAVE_YASM
2746 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; 2705 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
2747 #endif 2706 #endif
2748 #if HAVE_7REGS && HAVE_TEN_OPERANDS 2707 #if HAVE_7REGS && HAVE_TEN_OPERANDS
2823 c->put_pixels_tab[0][0] = put_pixels16_sse2; 2782 c->put_pixels_tab[0][0] = put_pixels16_sse2;
2824 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; 2783 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
2825 H264_QPEL_FUNCS(0, 0, sse2); 2784 H264_QPEL_FUNCS(0, 0, sse2);
2826 } 2785 }
2827 if(mm_flags & FF_MM_SSE2){ 2786 if(mm_flags & FF_MM_SSE2){
2828 c->h264_idct8_add = ff_h264_idct8_add_sse2;
2829 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
2830
2831 H264_QPEL_FUNCS(0, 1, sse2); 2787 H264_QPEL_FUNCS(0, 1, sse2);
2832 H264_QPEL_FUNCS(0, 2, sse2); 2788 H264_QPEL_FUNCS(0, 2, sse2);
2833 H264_QPEL_FUNCS(0, 3, sse2); 2789 H264_QPEL_FUNCS(0, 3, sse2);
2834 H264_QPEL_FUNCS(1, 1, sse2); 2790 H264_QPEL_FUNCS(1, 1, sse2);
2835 H264_QPEL_FUNCS(1, 2, sse2); 2791 H264_QPEL_FUNCS(1, 2, sse2);
2869 #if HAVE_YASM 2825 #if HAVE_YASM
2870 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; 2826 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
2871 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe 2827 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe
2872 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; 2828 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
2873 #endif 2829 #endif
2874 }
2875 #endif
2876
2877 #if CONFIG_GPL && HAVE_YASM
2878 if (mm_flags & FF_MM_MMX2){
2879 #if ARCH_X86_32
2880 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
2881 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
2882 #endif
2883 if( mm_flags&FF_MM_SSE2 ){
2884 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
2885 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
2886 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
2887 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
2888 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
2889 #endif
2890 c->h264_idct_add16 = ff_h264_idct_add16_sse2;
2891 c->h264_idct_add8 = ff_h264_idct_add8_sse2;
2892 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
2893 }
2894 } 2830 }
2895 #endif 2831 #endif
2896 2832
2897 if(mm_flags & FF_MM_3DNOW){ 2833 if(mm_flags & FF_MM_3DNOW){
2898 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; 2834 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
2981 2917
2982 //av_fdct = just_return; 2918 //av_fdct = just_return;
2983 //ff_idct = just_return; 2919 //ff_idct = just_return;
2984 #endif 2920 #endif
2985 } 2921 }
2922
2923 #if CONFIG_H264DSP
2924 void ff_h264dsp_init_x86(H264DSPContext *c)
2925 {
2926 mm_flags = mm_support();
2927
2928 if (mm_flags & FF_MM_MMX) {
2929 c->h264_idct_dc_add=
2930 c->h264_idct_add= ff_h264_idct_add_mmx;
2931 c->h264_idct8_dc_add=
2932 c->h264_idct8_add= ff_h264_idct8_add_mmx;
2933
2934 c->h264_idct_add16 = ff_h264_idct_add16_mmx;
2935 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
2936 c->h264_idct_add8 = ff_h264_idct_add8_mmx;
2937 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
2938
2939 if (mm_flags & FF_MM_MMX2) {
2940 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
2941 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
2942 c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
2943 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
2944 c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
2945 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
2946
2947 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
2948 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
2949 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
2950 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
2951 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
2952 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
2953 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
2954
2955 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
2956 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
2957 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
2958 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
2959 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
2960 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
2961 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
2962 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
2963
2964 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
2965 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
2966 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
2967 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
2968 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
2969 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
2970 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
2971 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
2972 }
2973 if(mm_flags & FF_MM_SSE2){
2974 c->h264_idct8_add = ff_h264_idct8_add_sse2;
2975 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
2976 }
2977
2978 #if CONFIG_GPL && HAVE_YASM
2979 if (mm_flags & FF_MM_MMX2){
2980 #if ARCH_X86_32
2981 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
2982 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
2983 #endif
2984 if( mm_flags&FF_MM_SSE2 ){
2985 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
2986 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
2987 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
2988 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
2989 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
2990 #endif
2991 c->h264_idct_add16 = ff_h264_idct_add16_sse2;
2992 c->h264_idct_add8 = ff_h264_idct_add8_sse2;
2993 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
2994 }
2995 }
2996 #endif
2997 }
2998 }
2999 #endif /* CONFIG_H264DSP */