comparison x86/dsputil_mmx.c @ 12437:b242eb86ea9a libavcodec

Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1 fate failures on Win64.
author rbultje
date Mon, 30 Aug 2010 16:31:04 +0000
parents d6d0a43848b4
children 51fc247eed32
comparison
equal deleted inserted replaced
12436:d6d0a43848b4 12437:b242eb86ea9a
1817 PREFETCH(prefetch_mmx2, prefetcht0) 1817 PREFETCH(prefetch_mmx2, prefetcht0)
1818 PREFETCH(prefetch_3dnow, prefetch) 1818 PREFETCH(prefetch_3dnow, prefetch)
1819 #undef PREFETCH 1819 #undef PREFETCH
1820 1820
1821 #include "h264dsp_mmx.c" 1821 #include "h264dsp_mmx.c"
1822 #include "rv40dsp_mmx.c" 1822
1823 void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src,
1824 int stride, int h, int x, int y);
1825 void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src,
1826 int stride, int h, int x, int y);
1827 void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
1828 int stride, int h, int x, int y);
1829 void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src,
1830 int stride, int h, int x, int y);
1831 void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
1832 int stride, int h, int x, int y);
1833 void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src,
1834 int stride, int h, int x, int y);
1835 void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
1836 int stride, int h, int x, int y);
1837 void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
1838 int stride, int h, int x, int y);
1839 void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src,
1840 int stride, int h, int x, int y);
1841
1842 void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
1843 int stride, int h, int x, int y);
1844 void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
1845 int stride, int h, int x, int y);
1846 void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
1847 int stride, int h, int x, int y);
1848 void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
1849 int stride, int h, int x, int y);
1850 void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
1851 int stride, int h, int x, int y);
1852 void ff_avg_rv40_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
1853 int stride, int h, int x, int y);
1854
1855 void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
1856 int stride, int h, int x, int y);
1857 void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
1858 int stride, int h, int x, int y);
1859
1860 void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
1861 int stride, int h, int x, int y);
1862 void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
1863 int stride, int h, int x, int y);
1864 void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
1865 int stride, int h, int x, int y);
1866
1867 void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
1868 int stride, int h, int x, int y);
1869 void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
1870 int stride, int h, int x, int y);
1871 void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
1872 int stride, int h, int x, int y);
1873
1823 1874
1824 /* CAVS specific */ 1875 /* CAVS specific */
1825 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1876 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1826 put_pixels8_mmx(dst, src, stride, 8); 1877 put_pixels8_mmx(dst, src, stride, 8);
1827 } 1878 }
2626 2677
2627 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 2678 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2628 c->h263_v_loop_filter= h263_v_loop_filter_mmx; 2679 c->h263_v_loop_filter= h263_v_loop_filter_mmx;
2629 c->h263_h_loop_filter= h263_h_loop_filter_mmx; 2680 c->h263_h_loop_filter= h263_h_loop_filter_mmx;
2630 } 2681 }
2631 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd; 2682
2632 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx; 2683 #if HAVE_YASM
2633 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd; 2684 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
2634 2685 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
2635 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; 2686 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
2636 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; 2687
2688 c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
2689 c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
2690 #endif
2637 2691
2638 if (mm_flags & FF_MM_MMX2) { 2692 if (mm_flags & FF_MM_MMX2) {
2639 c->prefetch = prefetch_mmx2; 2693 c->prefetch = prefetch_mmx2;
2640 2694
2641 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; 2695 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
2710 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); 2764 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
2711 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); 2765 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
2712 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2); 2766 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
2713 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2); 2767 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
2714 2768
2715 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_mmx2;
2716 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_mmx2;
2717
2718 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_mmx2_nornd;
2719
2720 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd;
2721 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
2722 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
2723 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
2724
2725 #if HAVE_YASM 2769 #if HAVE_YASM
2770 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
2771 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
2772
2773 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
2774
2775 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
2776 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
2777 c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
2778 c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
2779
2726 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; 2780 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
2727 #endif 2781 #endif
2728 #if HAVE_7REGS && HAVE_TEN_OPERANDS 2782 #if HAVE_7REGS && HAVE_TEN_OPERANDS
2729 if( mm_flags&FF_MM_3DNOW ) 2783 if( mm_flags&FF_MM_3DNOW )
2730 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; 2784 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
2783 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); 2837 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
2784 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); 2838 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
2785 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow); 2839 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
2786 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); 2840 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
2787 2841
2788 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd; 2842 #if HAVE_YASM
2789 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; 2843 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
2790 2844 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
2791 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_3dnow; 2845
2792 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_3dnow; 2846 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
2847
2848 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
2849 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
2850 #endif
2793 } 2851 }
2794 2852
2795 2853
2796 #define H264_QPEL_FUNCS(x, y, CPU)\ 2854 #define H264_QPEL_FUNCS(x, y, CPU)\
2797 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\ 2855 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
2830 H264_QPEL_FUNCS(2, 3, ssse3); 2888 H264_QPEL_FUNCS(2, 3, ssse3);
2831 H264_QPEL_FUNCS(3, 0, ssse3); 2889 H264_QPEL_FUNCS(3, 0, ssse3);
2832 H264_QPEL_FUNCS(3, 1, ssse3); 2890 H264_QPEL_FUNCS(3, 1, ssse3);
2833 H264_QPEL_FUNCS(3, 2, ssse3); 2891 H264_QPEL_FUNCS(3, 2, ssse3);
2834 H264_QPEL_FUNCS(3, 3, ssse3); 2892 H264_QPEL_FUNCS(3, 3, ssse3);
2835 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_ssse3_nornd;
2836 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_ssse3_nornd;
2837 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;
2838 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;
2839 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;
2840 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;
2841 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; 2893 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
2842 #if HAVE_YASM 2894 #if HAVE_YASM
2895 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
2896 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
2897 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
2898 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
2899 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
2900 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
2843 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; 2901 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
2844 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe 2902 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe
2845 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; 2903 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
2846 #endif 2904 #endif
2847 } 2905 }