Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 8104:0d108ec85620 libavcodec
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
Add missing one for FF_MM_ALTIVEC to avcodec.h.
Rename all the occurences of MM_* to the corresponding FF_MM_*.
author | rathann |
---|---|
date | Mon, 03 Nov 2008 18:08:00 +0000 |
parents | 915bc657348f |
children | cf4d575b1982 |
comparison
equal
deleted
inserted
replaced
8103:069d7a8e2e75 | 8104:0d108ec85620 |
---|---|
2496 mm_flags &= ~(avctx->dsp_mask & 0xffff); | 2496 mm_flags &= ~(avctx->dsp_mask & 0xffff); |
2497 } | 2497 } |
2498 | 2498 |
2499 #if 0 | 2499 #if 0 |
2500 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); | 2500 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); |
2501 if (mm_flags & MM_MMX) | 2501 if (mm_flags & FF_MM_MMX) |
2502 av_log(avctx, AV_LOG_INFO, " mmx"); | 2502 av_log(avctx, AV_LOG_INFO, " mmx"); |
2503 if (mm_flags & MM_MMXEXT) | 2503 if (mm_flags & FF_MM_MMXEXT) |
2504 av_log(avctx, AV_LOG_INFO, " mmxext"); | 2504 av_log(avctx, AV_LOG_INFO, " mmxext"); |
2505 if (mm_flags & MM_3DNOW) | 2505 if (mm_flags & FF_MM_3DNOW) |
2506 av_log(avctx, AV_LOG_INFO, " 3dnow"); | 2506 av_log(avctx, AV_LOG_INFO, " 3dnow"); |
2507 if (mm_flags & MM_SSE) | 2507 if (mm_flags & FF_MM_SSE) |
2508 av_log(avctx, AV_LOG_INFO, " sse"); | 2508 av_log(avctx, AV_LOG_INFO, " sse"); |
2509 if (mm_flags & MM_SSE2) | 2509 if (mm_flags & FF_MM_SSE2) |
2510 av_log(avctx, AV_LOG_INFO, " sse2"); | 2510 av_log(avctx, AV_LOG_INFO, " sse2"); |
2511 av_log(avctx, AV_LOG_INFO, "\n"); | 2511 av_log(avctx, AV_LOG_INFO, "\n"); |
2512 #endif | 2512 #endif |
2513 | 2513 |
2514 if (mm_flags & MM_MMX) { | 2514 if (mm_flags & FF_MM_MMX) { |
2515 const int idct_algo= avctx->idct_algo; | 2515 const int idct_algo= avctx->idct_algo; |
2516 | 2516 |
2517 if(avctx->lowres==0){ | 2517 if(avctx->lowres==0){ |
2518 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ | 2518 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ |
2519 c->idct_put= ff_simple_idct_put_mmx; | 2519 c->idct_put= ff_simple_idct_put_mmx; |
2520 c->idct_add= ff_simple_idct_add_mmx; | 2520 c->idct_add= ff_simple_idct_add_mmx; |
2521 c->idct = ff_simple_idct_mmx; | 2521 c->idct = ff_simple_idct_mmx; |
2522 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; | 2522 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; |
2523 #ifdef CONFIG_GPL | 2523 #ifdef CONFIG_GPL |
2524 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ | 2524 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ |
2525 if(mm_flags & MM_MMXEXT){ | 2525 if(mm_flags & FF_MM_MMXEXT){ |
2526 c->idct_put= ff_libmpeg2mmx2_idct_put; | 2526 c->idct_put= ff_libmpeg2mmx2_idct_put; |
2527 c->idct_add= ff_libmpeg2mmx2_idct_add; | 2527 c->idct_add= ff_libmpeg2mmx2_idct_add; |
2528 c->idct = ff_mmxext_idct; | 2528 c->idct = ff_mmxext_idct; |
2529 }else{ | 2529 }else{ |
2530 c->idct_put= ff_libmpeg2mmx_idct_put; | 2530 c->idct_put= ff_libmpeg2mmx_idct_put; |
2533 } | 2533 } |
2534 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | 2534 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
2535 #endif | 2535 #endif |
2536 }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER) && | 2536 }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER) && |
2537 idct_algo==FF_IDCT_VP3){ | 2537 idct_algo==FF_IDCT_VP3){ |
2538 if(mm_flags & MM_SSE2){ | 2538 if(mm_flags & FF_MM_SSE2){ |
2539 c->idct_put= ff_vp3_idct_put_sse2; | 2539 c->idct_put= ff_vp3_idct_put_sse2; |
2540 c->idct_add= ff_vp3_idct_add_sse2; | 2540 c->idct_add= ff_vp3_idct_add_sse2; |
2541 c->idct = ff_vp3_idct_sse2; | 2541 c->idct = ff_vp3_idct_sse2; |
2542 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; | 2542 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; |
2543 }else{ | 2543 }else{ |
2547 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; | 2547 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; |
2548 } | 2548 } |
2549 }else if(idct_algo==FF_IDCT_CAVS){ | 2549 }else if(idct_algo==FF_IDCT_CAVS){ |
2550 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; | 2550 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; |
2551 }else if(idct_algo==FF_IDCT_XVIDMMX){ | 2551 }else if(idct_algo==FF_IDCT_XVIDMMX){ |
2552 if(mm_flags & MM_SSE2){ | 2552 if(mm_flags & FF_MM_SSE2){ |
2553 c->idct_put= ff_idct_xvid_sse2_put; | 2553 c->idct_put= ff_idct_xvid_sse2_put; |
2554 c->idct_add= ff_idct_xvid_sse2_add; | 2554 c->idct_add= ff_idct_xvid_sse2_add; |
2555 c->idct = ff_idct_xvid_sse2; | 2555 c->idct = ff_idct_xvid_sse2; |
2556 c->idct_permutation_type= FF_SSE2_IDCT_PERM; | 2556 c->idct_permutation_type= FF_SSE2_IDCT_PERM; |
2557 }else if(mm_flags & MM_MMXEXT){ | 2557 }else if(mm_flags & FF_MM_MMXEXT){ |
2558 c->idct_put= ff_idct_xvid_mmx2_put; | 2558 c->idct_put= ff_idct_xvid_mmx2_put; |
2559 c->idct_add= ff_idct_xvid_mmx2_add; | 2559 c->idct_add= ff_idct_xvid_mmx2_add; |
2560 c->idct = ff_idct_xvid_mmx2; | 2560 c->idct = ff_idct_xvid_mmx2; |
2561 }else{ | 2561 }else{ |
2562 c->idct_put= ff_idct_xvid_mmx_put; | 2562 c->idct_put= ff_idct_xvid_mmx_put; |
2603 | 2603 |
2604 c->h264_idct_dc_add= | 2604 c->h264_idct_dc_add= |
2605 c->h264_idct_add= ff_h264_idct_add_mmx; | 2605 c->h264_idct_add= ff_h264_idct_add_mmx; |
2606 c->h264_idct8_dc_add= | 2606 c->h264_idct8_dc_add= |
2607 c->h264_idct8_add= ff_h264_idct8_add_mmx; | 2607 c->h264_idct8_add= ff_h264_idct8_add_mmx; |
2608 if (mm_flags & MM_SSE2) | 2608 if (mm_flags & FF_MM_SSE2) |
2609 c->h264_idct8_add= ff_h264_idct8_add_sse2; | 2609 c->h264_idct8_add= ff_h264_idct8_add_sse2; |
2610 | 2610 |
2611 if (mm_flags & MM_MMXEXT) { | 2611 if (mm_flags & FF_MM_MMXEXT) { |
2612 c->prefetch = prefetch_mmx2; | 2612 c->prefetch = prefetch_mmx2; |
2613 | 2613 |
2614 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | 2614 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
2615 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | 2615 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
2616 | 2616 |
2714 | 2714 |
2715 if (ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER) | 2715 if (ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER) |
2716 ff_vc1dsp_init_mmx(c, avctx); | 2716 ff_vc1dsp_init_mmx(c, avctx); |
2717 | 2717 |
2718 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2; | 2718 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2; |
2719 } else if (mm_flags & MM_3DNOW) { | 2719 } else if (mm_flags & FF_MM_3DNOW) { |
2720 c->prefetch = prefetch_3dnow; | 2720 c->prefetch = prefetch_3dnow; |
2721 | 2721 |
2722 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | 2722 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; |
2723 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | 2723 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |
2724 | 2724 |
2772 #define H264_QPEL_FUNCS(x, y, CPU)\ | 2772 #define H264_QPEL_FUNCS(x, y, CPU)\ |
2773 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\ | 2773 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\ |
2774 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\ | 2774 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\ |
2775 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\ | 2775 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\ |
2776 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; | 2776 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; |
2777 if((mm_flags & MM_SSE2) && !(mm_flags & MM_3DNOW)){ | 2777 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ |
2778 // these functions are slower than mmx on AMD, but faster on Intel | 2778 // these functions are slower than mmx on AMD, but faster on Intel |
2779 /* FIXME works in most codecs, but crashes svq1 due to unaligned chroma | 2779 /* FIXME works in most codecs, but crashes svq1 due to unaligned chroma |
2780 c->put_pixels_tab[0][0] = put_pixels16_sse2; | 2780 c->put_pixels_tab[0][0] = put_pixels16_sse2; |
2781 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; | 2781 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; |
2782 */ | 2782 */ |
2783 H264_QPEL_FUNCS(0, 0, sse2); | 2783 H264_QPEL_FUNCS(0, 0, sse2); |
2784 } | 2784 } |
2785 if(mm_flags & MM_SSE2){ | 2785 if(mm_flags & FF_MM_SSE2){ |
2786 H264_QPEL_FUNCS(0, 1, sse2); | 2786 H264_QPEL_FUNCS(0, 1, sse2); |
2787 H264_QPEL_FUNCS(0, 2, sse2); | 2787 H264_QPEL_FUNCS(0, 2, sse2); |
2788 H264_QPEL_FUNCS(0, 3, sse2); | 2788 H264_QPEL_FUNCS(0, 3, sse2); |
2789 H264_QPEL_FUNCS(1, 1, sse2); | 2789 H264_QPEL_FUNCS(1, 1, sse2); |
2790 H264_QPEL_FUNCS(1, 2, sse2); | 2790 H264_QPEL_FUNCS(1, 2, sse2); |
2795 H264_QPEL_FUNCS(3, 1, sse2); | 2795 H264_QPEL_FUNCS(3, 1, sse2); |
2796 H264_QPEL_FUNCS(3, 2, sse2); | 2796 H264_QPEL_FUNCS(3, 2, sse2); |
2797 H264_QPEL_FUNCS(3, 3, sse2); | 2797 H264_QPEL_FUNCS(3, 3, sse2); |
2798 } | 2798 } |
2799 #ifdef HAVE_SSSE3 | 2799 #ifdef HAVE_SSSE3 |
2800 if(mm_flags & MM_SSSE3){ | 2800 if(mm_flags & FF_MM_SSSE3){ |
2801 H264_QPEL_FUNCS(1, 0, ssse3); | 2801 H264_QPEL_FUNCS(1, 0, ssse3); |
2802 H264_QPEL_FUNCS(1, 1, ssse3); | 2802 H264_QPEL_FUNCS(1, 1, ssse3); |
2803 H264_QPEL_FUNCS(1, 2, ssse3); | 2803 H264_QPEL_FUNCS(1, 2, ssse3); |
2804 H264_QPEL_FUNCS(1, 3, ssse3); | 2804 H264_QPEL_FUNCS(1, 3, ssse3); |
2805 H264_QPEL_FUNCS(2, 0, ssse3); | 2805 H264_QPEL_FUNCS(2, 0, ssse3); |
2818 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; | 2818 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; |
2819 } | 2819 } |
2820 #endif | 2820 #endif |
2821 | 2821 |
2822 #ifdef CONFIG_SNOW_DECODER | 2822 #ifdef CONFIG_SNOW_DECODER |
2823 if(mm_flags & MM_SSE2 & 0){ | 2823 if(mm_flags & FF_MM_SSE2 & 0){ |
2824 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; | 2824 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; |
2825 #ifdef HAVE_7REGS | 2825 #ifdef HAVE_7REGS |
2826 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; | 2826 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; |
2827 #endif | 2827 #endif |
2828 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; | 2828 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; |
2829 } | 2829 } |
2830 else{ | 2830 else{ |
2831 if(mm_flags & MM_MMXEXT){ | 2831 if(mm_flags & FF_MM_MMXEXT){ |
2832 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; | 2832 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; |
2833 #ifdef HAVE_7REGS | 2833 #ifdef HAVE_7REGS |
2834 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; | 2834 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; |
2835 #endif | 2835 #endif |
2836 } | 2836 } |
2837 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; | 2837 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; |
2838 } | 2838 } |
2839 #endif | 2839 #endif |
2840 | 2840 |
2841 if(mm_flags & MM_3DNOW){ | 2841 if(mm_flags & FF_MM_3DNOW){ |
2842 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; | 2842 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; |
2843 c->vector_fmul = vector_fmul_3dnow; | 2843 c->vector_fmul = vector_fmul_3dnow; |
2844 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 2844 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
2845 c->float_to_int16 = float_to_int16_3dnow; | 2845 c->float_to_int16 = float_to_int16_3dnow; |
2846 c->float_to_int16_interleave = float_to_int16_interleave_3dnow; | 2846 c->float_to_int16_interleave = float_to_int16_interleave_3dnow; |
2847 } | 2847 } |
2848 } | 2848 } |
2849 if(mm_flags & MM_3DNOWEXT){ | 2849 if(mm_flags & FF_MM_3DNOWEXT){ |
2850 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; | 2850 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; |
2851 c->vector_fmul_window = vector_fmul_window_3dnow2; | 2851 c->vector_fmul_window = vector_fmul_window_3dnow2; |
2852 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 2852 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
2853 c->float_to_int16_interleave = float_to_int16_interleave_3dn2; | 2853 c->float_to_int16_interleave = float_to_int16_interleave_3dn2; |
2854 } | 2854 } |
2855 } | 2855 } |
2856 if(mm_flags & MM_SSE){ | 2856 if(mm_flags & FF_MM_SSE){ |
2857 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; | 2857 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; |
2858 c->ac3_downmix = ac3_downmix_sse; | 2858 c->ac3_downmix = ac3_downmix_sse; |
2859 c->vector_fmul = vector_fmul_sse; | 2859 c->vector_fmul = vector_fmul_sse; |
2860 c->vector_fmul_reverse = vector_fmul_reverse_sse; | 2860 c->vector_fmul_reverse = vector_fmul_reverse_sse; |
2861 c->vector_fmul_add_add = vector_fmul_add_add_sse; | 2861 c->vector_fmul_add_add = vector_fmul_add_add_sse; |
2862 c->vector_fmul_window = vector_fmul_window_sse; | 2862 c->vector_fmul_window = vector_fmul_window_sse; |
2863 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; | 2863 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; |
2864 c->float_to_int16 = float_to_int16_sse; | 2864 c->float_to_int16 = float_to_int16_sse; |
2865 c->float_to_int16_interleave = float_to_int16_interleave_sse; | 2865 c->float_to_int16_interleave = float_to_int16_interleave_sse; |
2866 } | 2866 } |
2867 if(mm_flags & MM_3DNOW) | 2867 if(mm_flags & FF_MM_3DNOW) |
2868 c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse | 2868 c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse |
2869 if(mm_flags & MM_SSE2){ | 2869 if(mm_flags & FF_MM_SSE2){ |
2870 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; | 2870 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; |
2871 c->float_to_int16 = float_to_int16_sse2; | 2871 c->float_to_int16 = float_to_int16_sse2; |
2872 c->float_to_int16_interleave = float_to_int16_interleave_sse2; | 2872 c->float_to_int16_interleave = float_to_int16_interleave_sse2; |
2873 c->add_int16 = add_int16_sse2; | 2873 c->add_int16 = add_int16_sse2; |
2874 c->sub_int16 = sub_int16_sse2; | 2874 c->sub_int16 = sub_int16_sse2; |