Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.c @ 11499:2a4dc3c0b012 libavcodec
Move H264 dsputil functions into their own struct
This moves the H264-specific functions from DSPContext to the new
H264DSPContext. The code is made conditional on CONFIG_H264DSP
which is set by the codecs requiring it.
The qpel and chroma MC functions are not moved as these are used by
non-h264 code.
author | mru |
---|---|
date | Tue, 16 Mar 2010 01:17:00 +0000 |
parents | 0f0cd6b5791f |
children | f7281af560fe |
comparison
equal
deleted
inserted
replaced
11498:8e889ce0d616 | 11499:2a4dc3c0b012 |
---|---|
22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | 22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
23 */ | 23 */ |
24 | 24 |
25 #include "libavutil/x86_cpu.h" | 25 #include "libavutil/x86_cpu.h" |
26 #include "libavcodec/dsputil.h" | 26 #include "libavcodec/dsputil.h" |
27 #include "libavcodec/h264dsp.h" | |
27 #include "libavcodec/mpegvideo.h" | 28 #include "libavcodec/mpegvideo.h" |
28 #include "libavcodec/simple_idct.h" | 29 #include "libavcodec/simple_idct.h" |
29 #include "dsputil_mmx.h" | 30 #include "dsputil_mmx.h" |
30 #include "vp3dsp_mmx.h" | 31 #include "vp3dsp_mmx.h" |
31 #include "vp3dsp_sse2.h" | 32 #include "vp3dsp_sse2.h" |
2616 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd; | 2617 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd; |
2617 | 2618 |
2618 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; | 2619 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; |
2619 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; | 2620 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; |
2620 | 2621 |
2621 c->h264_idct_dc_add= | |
2622 c->h264_idct_add= ff_h264_idct_add_mmx; | |
2623 c->h264_idct8_dc_add= | |
2624 c->h264_idct8_add= ff_h264_idct8_add_mmx; | |
2625 | |
2626 c->h264_idct_add16 = ff_h264_idct_add16_mmx; | |
2627 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; | |
2628 c->h264_idct_add8 = ff_h264_idct_add8_mmx; | |
2629 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; | |
2630 | |
2631 if (CONFIG_VP6_DECODER) { | 2622 if (CONFIG_VP6_DECODER) { |
2632 c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; | 2623 c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; |
2633 } | 2624 } |
2634 | 2625 |
2635 if (mm_flags & FF_MM_MMX2) { | 2626 if (mm_flags & FF_MM_MMX2) { |
2646 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; | 2637 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; |
2647 | 2638 |
2648 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; | 2639 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; |
2649 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; | 2640 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; |
2650 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; | 2641 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; |
2651 | |
2652 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; | |
2653 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; | |
2654 c->h264_idct_add16 = ff_h264_idct_add16_mmx2; | |
2655 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; | |
2656 c->h264_idct_add8 = ff_h264_idct_add8_mmx2; | |
2657 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; | |
2658 | 2642 |
2659 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 2643 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
2660 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | 2644 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; |
2661 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | 2645 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; |
2662 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | 2646 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; |
2714 | 2698 |
2715 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd; | 2699 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd; |
2716 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; | 2700 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; |
2717 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; | 2701 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; |
2718 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; | 2702 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; |
2719 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; | |
2720 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; | |
2721 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; | |
2722 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2; | |
2723 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2; | |
2724 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2; | |
2725 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; | |
2726 | |
2727 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; | |
2728 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; | |
2729 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; | |
2730 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; | |
2731 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; | |
2732 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; | |
2733 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; | |
2734 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; | |
2735 | |
2736 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; | |
2737 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; | |
2738 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; | |
2739 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; | |
2740 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; | |
2741 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; | |
2742 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; | |
2743 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; | |
2744 | 2703 |
2745 #if HAVE_YASM | 2704 #if HAVE_YASM |
2746 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; | 2705 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; |
2747 #endif | 2706 #endif |
2748 #if HAVE_7REGS && HAVE_TEN_OPERANDS | 2707 #if HAVE_7REGS && HAVE_TEN_OPERANDS |
2823 c->put_pixels_tab[0][0] = put_pixels16_sse2; | 2782 c->put_pixels_tab[0][0] = put_pixels16_sse2; |
2824 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; | 2783 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; |
2825 H264_QPEL_FUNCS(0, 0, sse2); | 2784 H264_QPEL_FUNCS(0, 0, sse2); |
2826 } | 2785 } |
2827 if(mm_flags & FF_MM_SSE2){ | 2786 if(mm_flags & FF_MM_SSE2){ |
2828 c->h264_idct8_add = ff_h264_idct8_add_sse2; | |
2829 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; | |
2830 | |
2831 H264_QPEL_FUNCS(0, 1, sse2); | 2787 H264_QPEL_FUNCS(0, 1, sse2); |
2832 H264_QPEL_FUNCS(0, 2, sse2); | 2788 H264_QPEL_FUNCS(0, 2, sse2); |
2833 H264_QPEL_FUNCS(0, 3, sse2); | 2789 H264_QPEL_FUNCS(0, 3, sse2); |
2834 H264_QPEL_FUNCS(1, 1, sse2); | 2790 H264_QPEL_FUNCS(1, 1, sse2); |
2835 H264_QPEL_FUNCS(1, 2, sse2); | 2791 H264_QPEL_FUNCS(1, 2, sse2); |
2869 #if HAVE_YASM | 2825 #if HAVE_YASM |
2870 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; | 2826 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; |
2871 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe | 2827 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe |
2872 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; | 2828 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; |
2873 #endif | 2829 #endif |
2874 } | |
2875 #endif | |
2876 | |
2877 #if CONFIG_GPL && HAVE_YASM | |
2878 if (mm_flags & FF_MM_MMX2){ | |
2879 #if ARCH_X86_32 | |
2880 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; | |
2881 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; | |
2882 #endif | |
2883 if( mm_flags&FF_MM_SSE2 ){ | |
2884 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110 | |
2885 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; | |
2886 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; | |
2887 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; | |
2888 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; | |
2889 #endif | |
2890 c->h264_idct_add16 = ff_h264_idct_add16_sse2; | |
2891 c->h264_idct_add8 = ff_h264_idct_add8_sse2; | |
2892 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; | |
2893 } | |
2894 } | 2830 } |
2895 #endif | 2831 #endif |
2896 | 2832 |
2897 if(mm_flags & FF_MM_3DNOW){ | 2833 if(mm_flags & FF_MM_3DNOW){ |
2898 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; | 2834 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; |
2981 | 2917 |
2982 //av_fdct = just_return; | 2918 //av_fdct = just_return; |
2983 //ff_idct = just_return; | 2919 //ff_idct = just_return; |
2984 #endif | 2920 #endif |
2985 } | 2921 } |
2922 | |
2923 #if CONFIG_H264DSP | |
2924 void ff_h264dsp_init_x86(H264DSPContext *c) | |
2925 { | |
2926 mm_flags = mm_support(); | |
2927 | |
2928 if (mm_flags & FF_MM_MMX) { | |
2929 c->h264_idct_dc_add= | |
2930 c->h264_idct_add= ff_h264_idct_add_mmx; | |
2931 c->h264_idct8_dc_add= | |
2932 c->h264_idct8_add= ff_h264_idct8_add_mmx; | |
2933 | |
2934 c->h264_idct_add16 = ff_h264_idct_add16_mmx; | |
2935 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; | |
2936 c->h264_idct_add8 = ff_h264_idct_add8_mmx; | |
2937 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; | |
2938 | |
2939 if (mm_flags & FF_MM_MMX2) { | |
2940 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; | |
2941 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; | |
2942 c->h264_idct_add16 = ff_h264_idct_add16_mmx2; | |
2943 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; | |
2944 c->h264_idct_add8 = ff_h264_idct_add8_mmx2; | |
2945 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; | |
2946 | |
2947 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; | |
2948 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; | |
2949 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; | |
2950 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2; | |
2951 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2; | |
2952 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2; | |
2953 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; | |
2954 | |
2955 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; | |
2956 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; | |
2957 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; | |
2958 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; | |
2959 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; | |
2960 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; | |
2961 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; | |
2962 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; | |
2963 | |
2964 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; | |
2965 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; | |
2966 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; | |
2967 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; | |
2968 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; | |
2969 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; | |
2970 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; | |
2971 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; | |
2972 } | |
2973 if(mm_flags & FF_MM_SSE2){ | |
2974 c->h264_idct8_add = ff_h264_idct8_add_sse2; | |
2975 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; | |
2976 } | |
2977 | |
2978 #if CONFIG_GPL && HAVE_YASM | |
2979 if (mm_flags & FF_MM_MMX2){ | |
2980 #if ARCH_X86_32 | |
2981 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; | |
2982 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; | |
2983 #endif | |
2984 if( mm_flags&FF_MM_SSE2 ){ | |
2985 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110 | |
2986 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; | |
2987 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; | |
2988 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; | |
2989 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; | |
2990 #endif | |
2991 c->h264_idct_add16 = ff_h264_idct_add16_sse2; | |
2992 c->h264_idct_add8 = ff_h264_idct_add8_sse2; | |
2993 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; | |
2994 } | |
2995 } | |
2996 #endif | |
2997 } | |
2998 } | |
2999 #endif /* CONFIG_H264DSP */ |