comparison i386/dsputil_mmx.c @ 8104:0d108ec85620 libavcodec

Remove duplicated MM_* macros for CPU capabilities from dsputil.h. Add missing one for FF_MM_ALTIVEC to avcodec.h. Rename all the occurences of MM_* to the corresponding FF_MM_*.
author rathann
date Mon, 03 Nov 2008 18:08:00 +0000
parents 915bc657348f
children cf4d575b1982
comparison
equal deleted inserted replaced
8103:069d7a8e2e75 8104:0d108ec85620
2496 mm_flags &= ~(avctx->dsp_mask & 0xffff); 2496 mm_flags &= ~(avctx->dsp_mask & 0xffff);
2497 } 2497 }
2498 2498
2499 #if 0 2499 #if 0
2500 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); 2500 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
2501 if (mm_flags & MM_MMX) 2501 if (mm_flags & FF_MM_MMX)
2502 av_log(avctx, AV_LOG_INFO, " mmx"); 2502 av_log(avctx, AV_LOG_INFO, " mmx");
2503 if (mm_flags & MM_MMXEXT) 2503 if (mm_flags & FF_MM_MMXEXT)
2504 av_log(avctx, AV_LOG_INFO, " mmxext"); 2504 av_log(avctx, AV_LOG_INFO, " mmxext");
2505 if (mm_flags & MM_3DNOW) 2505 if (mm_flags & FF_MM_3DNOW)
2506 av_log(avctx, AV_LOG_INFO, " 3dnow"); 2506 av_log(avctx, AV_LOG_INFO, " 3dnow");
2507 if (mm_flags & MM_SSE) 2507 if (mm_flags & FF_MM_SSE)
2508 av_log(avctx, AV_LOG_INFO, " sse"); 2508 av_log(avctx, AV_LOG_INFO, " sse");
2509 if (mm_flags & MM_SSE2) 2509 if (mm_flags & FF_MM_SSE2)
2510 av_log(avctx, AV_LOG_INFO, " sse2"); 2510 av_log(avctx, AV_LOG_INFO, " sse2");
2511 av_log(avctx, AV_LOG_INFO, "\n"); 2511 av_log(avctx, AV_LOG_INFO, "\n");
2512 #endif 2512 #endif
2513 2513
2514 if (mm_flags & MM_MMX) { 2514 if (mm_flags & FF_MM_MMX) {
2515 const int idct_algo= avctx->idct_algo; 2515 const int idct_algo= avctx->idct_algo;
2516 2516
2517 if(avctx->lowres==0){ 2517 if(avctx->lowres==0){
2518 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ 2518 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
2519 c->idct_put= ff_simple_idct_put_mmx; 2519 c->idct_put= ff_simple_idct_put_mmx;
2520 c->idct_add= ff_simple_idct_add_mmx; 2520 c->idct_add= ff_simple_idct_add_mmx;
2521 c->idct = ff_simple_idct_mmx; 2521 c->idct = ff_simple_idct_mmx;
2522 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; 2522 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
2523 #ifdef CONFIG_GPL 2523 #ifdef CONFIG_GPL
2524 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ 2524 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
2525 if(mm_flags & MM_MMXEXT){ 2525 if(mm_flags & FF_MM_MMXEXT){
2526 c->idct_put= ff_libmpeg2mmx2_idct_put; 2526 c->idct_put= ff_libmpeg2mmx2_idct_put;
2527 c->idct_add= ff_libmpeg2mmx2_idct_add; 2527 c->idct_add= ff_libmpeg2mmx2_idct_add;
2528 c->idct = ff_mmxext_idct; 2528 c->idct = ff_mmxext_idct;
2529 }else{ 2529 }else{
2530 c->idct_put= ff_libmpeg2mmx_idct_put; 2530 c->idct_put= ff_libmpeg2mmx_idct_put;
2533 } 2533 }
2534 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 2534 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2535 #endif 2535 #endif
2536 }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER) && 2536 }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER) &&
2537 idct_algo==FF_IDCT_VP3){ 2537 idct_algo==FF_IDCT_VP3){
2538 if(mm_flags & MM_SSE2){ 2538 if(mm_flags & FF_MM_SSE2){
2539 c->idct_put= ff_vp3_idct_put_sse2; 2539 c->idct_put= ff_vp3_idct_put_sse2;
2540 c->idct_add= ff_vp3_idct_add_sse2; 2540 c->idct_add= ff_vp3_idct_add_sse2;
2541 c->idct = ff_vp3_idct_sse2; 2541 c->idct = ff_vp3_idct_sse2;
2542 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; 2542 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
2543 }else{ 2543 }else{
2547 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; 2547 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
2548 } 2548 }
2549 }else if(idct_algo==FF_IDCT_CAVS){ 2549 }else if(idct_algo==FF_IDCT_CAVS){
2550 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; 2550 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
2551 }else if(idct_algo==FF_IDCT_XVIDMMX){ 2551 }else if(idct_algo==FF_IDCT_XVIDMMX){
2552 if(mm_flags & MM_SSE2){ 2552 if(mm_flags & FF_MM_SSE2){
2553 c->idct_put= ff_idct_xvid_sse2_put; 2553 c->idct_put= ff_idct_xvid_sse2_put;
2554 c->idct_add= ff_idct_xvid_sse2_add; 2554 c->idct_add= ff_idct_xvid_sse2_add;
2555 c->idct = ff_idct_xvid_sse2; 2555 c->idct = ff_idct_xvid_sse2;
2556 c->idct_permutation_type= FF_SSE2_IDCT_PERM; 2556 c->idct_permutation_type= FF_SSE2_IDCT_PERM;
2557 }else if(mm_flags & MM_MMXEXT){ 2557 }else if(mm_flags & FF_MM_MMXEXT){
2558 c->idct_put= ff_idct_xvid_mmx2_put; 2558 c->idct_put= ff_idct_xvid_mmx2_put;
2559 c->idct_add= ff_idct_xvid_mmx2_add; 2559 c->idct_add= ff_idct_xvid_mmx2_add;
2560 c->idct = ff_idct_xvid_mmx2; 2560 c->idct = ff_idct_xvid_mmx2;
2561 }else{ 2561 }else{
2562 c->idct_put= ff_idct_xvid_mmx_put; 2562 c->idct_put= ff_idct_xvid_mmx_put;
2603 2603
2604 c->h264_idct_dc_add= 2604 c->h264_idct_dc_add=
2605 c->h264_idct_add= ff_h264_idct_add_mmx; 2605 c->h264_idct_add= ff_h264_idct_add_mmx;
2606 c->h264_idct8_dc_add= 2606 c->h264_idct8_dc_add=
2607 c->h264_idct8_add= ff_h264_idct8_add_mmx; 2607 c->h264_idct8_add= ff_h264_idct8_add_mmx;
2608 if (mm_flags & MM_SSE2) 2608 if (mm_flags & FF_MM_SSE2)
2609 c->h264_idct8_add= ff_h264_idct8_add_sse2; 2609 c->h264_idct8_add= ff_h264_idct8_add_sse2;
2610 2610
2611 if (mm_flags & MM_MMXEXT) { 2611 if (mm_flags & FF_MM_MMXEXT) {
2612 c->prefetch = prefetch_mmx2; 2612 c->prefetch = prefetch_mmx2;
2613 2613
2614 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; 2614 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
2615 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; 2615 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
2616 2616
2714 2714
2715 if (ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER) 2715 if (ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER)
2716 ff_vc1dsp_init_mmx(c, avctx); 2716 ff_vc1dsp_init_mmx(c, avctx);
2717 2717
2718 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2; 2718 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
2719 } else if (mm_flags & MM_3DNOW) { 2719 } else if (mm_flags & FF_MM_3DNOW) {
2720 c->prefetch = prefetch_3dnow; 2720 c->prefetch = prefetch_3dnow;
2721 2721
2722 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; 2722 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
2723 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; 2723 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
2724 2724
2772 #define H264_QPEL_FUNCS(x, y, CPU)\ 2772 #define H264_QPEL_FUNCS(x, y, CPU)\
2773 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\ 2773 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
2774 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\ 2774 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
2775 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\ 2775 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
2776 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; 2776 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
2777 if((mm_flags & MM_SSE2) && !(mm_flags & MM_3DNOW)){ 2777 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
2778 // these functions are slower than mmx on AMD, but faster on Intel 2778 // these functions are slower than mmx on AMD, but faster on Intel
2779 /* FIXME works in most codecs, but crashes svq1 due to unaligned chroma 2779 /* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
2780 c->put_pixels_tab[0][0] = put_pixels16_sse2; 2780 c->put_pixels_tab[0][0] = put_pixels16_sse2;
2781 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; 2781 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
2782 */ 2782 */
2783 H264_QPEL_FUNCS(0, 0, sse2); 2783 H264_QPEL_FUNCS(0, 0, sse2);
2784 } 2784 }
2785 if(mm_flags & MM_SSE2){ 2785 if(mm_flags & FF_MM_SSE2){
2786 H264_QPEL_FUNCS(0, 1, sse2); 2786 H264_QPEL_FUNCS(0, 1, sse2);
2787 H264_QPEL_FUNCS(0, 2, sse2); 2787 H264_QPEL_FUNCS(0, 2, sse2);
2788 H264_QPEL_FUNCS(0, 3, sse2); 2788 H264_QPEL_FUNCS(0, 3, sse2);
2789 H264_QPEL_FUNCS(1, 1, sse2); 2789 H264_QPEL_FUNCS(1, 1, sse2);
2790 H264_QPEL_FUNCS(1, 2, sse2); 2790 H264_QPEL_FUNCS(1, 2, sse2);
2795 H264_QPEL_FUNCS(3, 1, sse2); 2795 H264_QPEL_FUNCS(3, 1, sse2);
2796 H264_QPEL_FUNCS(3, 2, sse2); 2796 H264_QPEL_FUNCS(3, 2, sse2);
2797 H264_QPEL_FUNCS(3, 3, sse2); 2797 H264_QPEL_FUNCS(3, 3, sse2);
2798 } 2798 }
2799 #ifdef HAVE_SSSE3 2799 #ifdef HAVE_SSSE3
2800 if(mm_flags & MM_SSSE3){ 2800 if(mm_flags & FF_MM_SSSE3){
2801 H264_QPEL_FUNCS(1, 0, ssse3); 2801 H264_QPEL_FUNCS(1, 0, ssse3);
2802 H264_QPEL_FUNCS(1, 1, ssse3); 2802 H264_QPEL_FUNCS(1, 1, ssse3);
2803 H264_QPEL_FUNCS(1, 2, ssse3); 2803 H264_QPEL_FUNCS(1, 2, ssse3);
2804 H264_QPEL_FUNCS(1, 3, ssse3); 2804 H264_QPEL_FUNCS(1, 3, ssse3);
2805 H264_QPEL_FUNCS(2, 0, ssse3); 2805 H264_QPEL_FUNCS(2, 0, ssse3);
2818 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; 2818 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
2819 } 2819 }
2820 #endif 2820 #endif
2821 2821
2822 #ifdef CONFIG_SNOW_DECODER 2822 #ifdef CONFIG_SNOW_DECODER
2823 if(mm_flags & MM_SSE2 & 0){ 2823 if(mm_flags & FF_MM_SSE2 & 0){
2824 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; 2824 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
2825 #ifdef HAVE_7REGS 2825 #ifdef HAVE_7REGS
2826 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; 2826 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
2827 #endif 2827 #endif
2828 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; 2828 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
2829 } 2829 }
2830 else{ 2830 else{
2831 if(mm_flags & MM_MMXEXT){ 2831 if(mm_flags & FF_MM_MMXEXT){
2832 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; 2832 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
2833 #ifdef HAVE_7REGS 2833 #ifdef HAVE_7REGS
2834 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; 2834 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
2835 #endif 2835 #endif
2836 } 2836 }
2837 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; 2837 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
2838 } 2838 }
2839 #endif 2839 #endif
2840 2840
2841 if(mm_flags & MM_3DNOW){ 2841 if(mm_flags & FF_MM_3DNOW){
2842 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; 2842 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
2843 c->vector_fmul = vector_fmul_3dnow; 2843 c->vector_fmul = vector_fmul_3dnow;
2844 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 2844 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2845 c->float_to_int16 = float_to_int16_3dnow; 2845 c->float_to_int16 = float_to_int16_3dnow;
2846 c->float_to_int16_interleave = float_to_int16_interleave_3dnow; 2846 c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
2847 } 2847 }
2848 } 2848 }
2849 if(mm_flags & MM_3DNOWEXT){ 2849 if(mm_flags & FF_MM_3DNOWEXT){
2850 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; 2850 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
2851 c->vector_fmul_window = vector_fmul_window_3dnow2; 2851 c->vector_fmul_window = vector_fmul_window_3dnow2;
2852 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 2852 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2853 c->float_to_int16_interleave = float_to_int16_interleave_3dn2; 2853 c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
2854 } 2854 }
2855 } 2855 }
2856 if(mm_flags & MM_SSE){ 2856 if(mm_flags & FF_MM_SSE){
2857 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; 2857 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
2858 c->ac3_downmix = ac3_downmix_sse; 2858 c->ac3_downmix = ac3_downmix_sse;
2859 c->vector_fmul = vector_fmul_sse; 2859 c->vector_fmul = vector_fmul_sse;
2860 c->vector_fmul_reverse = vector_fmul_reverse_sse; 2860 c->vector_fmul_reverse = vector_fmul_reverse_sse;
2861 c->vector_fmul_add_add = vector_fmul_add_add_sse; 2861 c->vector_fmul_add_add = vector_fmul_add_add_sse;
2862 c->vector_fmul_window = vector_fmul_window_sse; 2862 c->vector_fmul_window = vector_fmul_window_sse;
2863 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; 2863 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
2864 c->float_to_int16 = float_to_int16_sse; 2864 c->float_to_int16 = float_to_int16_sse;
2865 c->float_to_int16_interleave = float_to_int16_interleave_sse; 2865 c->float_to_int16_interleave = float_to_int16_interleave_sse;
2866 } 2866 }
2867 if(mm_flags & MM_3DNOW) 2867 if(mm_flags & FF_MM_3DNOW)
2868 c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse 2868 c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
2869 if(mm_flags & MM_SSE2){ 2869 if(mm_flags & FF_MM_SSE2){
2870 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; 2870 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
2871 c->float_to_int16 = float_to_int16_sse2; 2871 c->float_to_int16 = float_to_int16_sse2;
2872 c->float_to_int16_interleave = float_to_int16_interleave_sse2; 2872 c->float_to_int16_interleave = float_to_int16_interleave_sse2;
2873 c->add_int16 = add_int16_sse2; 2873 c->add_int16 = add_int16_sse2;
2874 c->sub_int16 = sub_int16_sse2; 2874 c->sub_int16 = sub_int16_sse2;