Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.c @ 12437:b242eb86ea9a libavcodec
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
fate failures on Win64.
author | rbultje |
---|---|
date | Mon, 30 Aug 2010 16:31:04 +0000 |
parents | d6d0a43848b4 |
children | 51fc247eed32 |
comparison
equal
deleted
inserted
replaced
12436:d6d0a43848b4 | 12437:b242eb86ea9a |
---|---|
1817 PREFETCH(prefetch_mmx2, prefetcht0) | 1817 PREFETCH(prefetch_mmx2, prefetcht0) |
1818 PREFETCH(prefetch_3dnow, prefetch) | 1818 PREFETCH(prefetch_3dnow, prefetch) |
1819 #undef PREFETCH | 1819 #undef PREFETCH |
1820 | 1820 |
1821 #include "h264dsp_mmx.c" | 1821 #include "h264dsp_mmx.c" |
1822 #include "rv40dsp_mmx.c" | 1822 |
1823 void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src, | |
1824 int stride, int h, int x, int y); | |
1825 void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src, | |
1826 int stride, int h, int x, int y); | |
1827 void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, | |
1828 int stride, int h, int x, int y); | |
1829 void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src, | |
1830 int stride, int h, int x, int y); | |
1831 void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src, | |
1832 int stride, int h, int x, int y); | |
1833 void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src, | |
1834 int stride, int h, int x, int y); | |
1835 void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src, | |
1836 int stride, int h, int x, int y); | |
1837 void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src, | |
1838 int stride, int h, int x, int y); | |
1839 void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src, | |
1840 int stride, int h, int x, int y); | |
1841 | |
1842 void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, | |
1843 int stride, int h, int x, int y); | |
1844 void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, | |
1845 int stride, int h, int x, int y); | |
1846 void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src, | |
1847 int stride, int h, int x, int y); | |
1848 void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src, | |
1849 int stride, int h, int x, int y); | |
1850 void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, | |
1851 int stride, int h, int x, int y); | |
1852 void ff_avg_rv40_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, | |
1853 int stride, int h, int x, int y); | |
1854 | |
1855 void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src, | |
1856 int stride, int h, int x, int y); | |
1857 void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src, | |
1858 int stride, int h, int x, int y); | |
1859 | |
1860 void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src, | |
1861 int stride, int h, int x, int y); | |
1862 void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src, | |
1863 int stride, int h, int x, int y); | |
1864 void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, | |
1865 int stride, int h, int x, int y); | |
1866 | |
1867 void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src, | |
1868 int stride, int h, int x, int y); | |
1869 void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src, | |
1870 int stride, int h, int x, int y); | |
1871 void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, | |
1872 int stride, int h, int x, int y); | |
1873 | |
1823 | 1874 |
1824 /* CAVS specific */ | 1875 /* CAVS specific */ |
1825 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1876 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
1826 put_pixels8_mmx(dst, src, stride, 8); | 1877 put_pixels8_mmx(dst, src, stride, 8); |
1827 } | 1878 } |
2626 | 2677 |
2627 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | 2678 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
2628 c->h263_v_loop_filter= h263_v_loop_filter_mmx; | 2679 c->h263_v_loop_filter= h263_v_loop_filter_mmx; |
2629 c->h263_h_loop_filter= h263_h_loop_filter_mmx; | 2680 c->h263_h_loop_filter= h263_h_loop_filter_mmx; |
2630 } | 2681 } |
2631 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd; | 2682 |
2632 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx; | 2683 #if HAVE_YASM |
2633 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd; | 2684 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; |
2634 | 2685 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; |
2635 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; | 2686 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd; |
2636 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; | 2687 |
2688 c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; | |
2689 c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; | |
2690 #endif | |
2637 | 2691 |
2638 if (mm_flags & FF_MM_MMX2) { | 2692 if (mm_flags & FF_MM_MMX2) { |
2639 c->prefetch = prefetch_mmx2; | 2693 c->prefetch = prefetch_mmx2; |
2640 | 2694 |
2641 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | 2695 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
2710 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); | 2764 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); |
2711 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); | 2765 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); |
2712 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2); | 2766 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2); |
2713 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2); | 2767 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2); |
2714 | 2768 |
2715 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_mmx2; | |
2716 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_mmx2; | |
2717 | |
2718 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_mmx2_nornd; | |
2719 | |
2720 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd; | |
2721 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; | |
2722 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; | |
2723 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; | |
2724 | |
2725 #if HAVE_YASM | 2769 #if HAVE_YASM |
2770 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; | |
2771 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2; | |
2772 | |
2773 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd; | |
2774 | |
2775 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; | |
2776 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; | |
2777 c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; | |
2778 c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2; | |
2779 | |
2726 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; | 2780 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; |
2727 #endif | 2781 #endif |
2728 #if HAVE_7REGS && HAVE_TEN_OPERANDS | 2782 #if HAVE_7REGS && HAVE_TEN_OPERANDS |
2729 if( mm_flags&FF_MM_3DNOW ) | 2783 if( mm_flags&FF_MM_3DNOW ) |
2730 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; | 2784 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; |
2783 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); | 2837 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); |
2784 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); | 2838 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); |
2785 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow); | 2839 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow); |
2786 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); | 2840 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); |
2787 | 2841 |
2788 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd; | 2842 #if HAVE_YASM |
2789 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; | 2843 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; |
2790 | 2844 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; |
2791 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_3dnow; | 2845 |
2792 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_3dnow; | 2846 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd; |
2847 | |
2848 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow; | |
2849 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow; | |
2850 #endif | |
2793 } | 2851 } |
2794 | 2852 |
2795 | 2853 |
2796 #define H264_QPEL_FUNCS(x, y, CPU)\ | 2854 #define H264_QPEL_FUNCS(x, y, CPU)\ |
2797 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\ | 2855 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\ |
2830 H264_QPEL_FUNCS(2, 3, ssse3); | 2888 H264_QPEL_FUNCS(2, 3, ssse3); |
2831 H264_QPEL_FUNCS(3, 0, ssse3); | 2889 H264_QPEL_FUNCS(3, 0, ssse3); |
2832 H264_QPEL_FUNCS(3, 1, ssse3); | 2890 H264_QPEL_FUNCS(3, 1, ssse3); |
2833 H264_QPEL_FUNCS(3, 2, ssse3); | 2891 H264_QPEL_FUNCS(3, 2, ssse3); |
2834 H264_QPEL_FUNCS(3, 3, ssse3); | 2892 H264_QPEL_FUNCS(3, 3, ssse3); |
2835 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_ssse3_nornd; | |
2836 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_ssse3_nornd; | |
2837 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd; | |
2838 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd; | |
2839 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3; | |
2840 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3; | |
2841 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; | 2893 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; |
2842 #if HAVE_YASM | 2894 #if HAVE_YASM |
2895 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd; | |
2896 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd; | |
2897 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; | |
2898 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; | |
2899 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; | |
2900 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3; | |
2843 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; | 2901 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; |
2844 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe | 2902 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe |
2845 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; | 2903 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; |
2846 #endif | 2904 #endif |
2847 } | 2905 } |