Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.c @ 8590:7a463923ecd1 libavcodec
Change semantic of CONFIG_*, HAVE_* and ARCH_*.
They are now always defined to either 0 or 1.
author | aurel |
---|---|
date | Tue, 13 Jan 2009 23:44:16 +0000 |
parents | f8bf438c6000 |
children | 68e959302527 |
comparison
equal
deleted
inserted
replaced
8589:a29b5b5c3c9d | 8590:7a463923ecd1 |
---|---|
878 "pabsw %%mm3, %%mm3 \n"\ | 878 "pabsw %%mm3, %%mm3 \n"\ |
879 "pabsw %%mm4, %%mm4 \n"\ | 879 "pabsw %%mm4, %%mm4 \n"\ |
880 "pabsw %%mm5, %%mm5 \n" | 880 "pabsw %%mm5, %%mm5 \n" |
881 | 881 |
882 PAETH(mmx2, ABS3_MMX2) | 882 PAETH(mmx2, ABS3_MMX2) |
883 #ifdef HAVE_SSSE3 | 883 #if HAVE_SSSE3 |
884 PAETH(ssse3, ABS3_SSSE3) | 884 PAETH(ssse3, ABS3_SSSE3) |
885 #endif | 885 #endif |
886 | 886 |
887 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ | 887 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ |
888 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\ | 888 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\ |
1763 void ff_mmx_idct(DCTELEM *block); | 1763 void ff_mmx_idct(DCTELEM *block); |
1764 void ff_mmxext_idct(DCTELEM *block); | 1764 void ff_mmxext_idct(DCTELEM *block); |
1765 | 1765 |
1766 /* XXX: those functions should be suppressed ASAP when all IDCTs are | 1766 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
1767 converted */ | 1767 converted */ |
1768 #ifdef CONFIG_GPL | 1768 #if CONFIG_GPL |
1769 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | 1769 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
1770 { | 1770 { |
1771 ff_mmx_idct (block); | 1771 ff_mmx_idct (block); |
1772 put_pixels_clamped_mmx(block, dest, line_size); | 1772 put_pixels_clamped_mmx(block, dest, line_size); |
1773 } | 1773 } |
2145 ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); | 2145 ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); |
2146 } | 2146 } |
2147 | 2147 |
2148 static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1, | 2148 static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1, |
2149 const float *win, float add_bias, int len){ | 2149 const float *win, float add_bias, int len){ |
2150 #ifdef HAVE_6REGS | 2150 #if HAVE_6REGS |
2151 if(add_bias == 0){ | 2151 if(add_bias == 0){ |
2152 x86_reg i = -len*4; | 2152 x86_reg i = -len*4; |
2153 x86_reg j = len*4-8; | 2153 x86_reg j = len*4-8; |
2154 __asm__ volatile( | 2154 __asm__ volatile( |
2155 "1: \n" | 2155 "1: \n" |
2180 ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len); | 2180 ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len); |
2181 } | 2181 } |
2182 | 2182 |
2183 static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1, | 2183 static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1, |
2184 const float *win, float add_bias, int len){ | 2184 const float *win, float add_bias, int len){ |
2185 #ifdef HAVE_6REGS | 2185 #if HAVE_6REGS |
2186 if(add_bias == 0){ | 2186 if(add_bias == 0){ |
2187 x86_reg i = -len*4; | 2187 x86_reg i = -len*4; |
2188 x86_reg j = len*4-16; | 2188 x86_reg j = len*4-16; |
2189 __asm__ volatile( | 2189 __asm__ volatile( |
2190 "1: \n" | 2190 "1: \n" |
2322 " js 1b \n\t" | 2322 " js 1b \n\t" |
2323 :"+r"(reglen), "+r"(dst), "+r"(src) | 2323 :"+r"(reglen), "+r"(dst), "+r"(src) |
2324 ); | 2324 ); |
2325 } | 2325 } |
2326 | 2326 |
2327 #ifdef HAVE_YASM | 2327 #if HAVE_YASM |
2328 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | 2328 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); |
2329 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | 2329 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); |
2330 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); | 2330 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); |
2331 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); | 2331 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); |
2332 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); | 2332 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); |
2333 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); | 2333 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); |
2334 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); | 2334 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); |
2335 #ifdef ARCH_X86_32 | 2335 #if ARCH_X86_32 |
2336 static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) | 2336 static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) |
2337 { | 2337 { |
2338 ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); | 2338 ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); |
2339 ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); | 2339 ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); |
2340 } | 2340 } |
2553 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ | 2553 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ |
2554 c->idct_put= ff_simple_idct_put_mmx; | 2554 c->idct_put= ff_simple_idct_put_mmx; |
2555 c->idct_add= ff_simple_idct_add_mmx; | 2555 c->idct_add= ff_simple_idct_add_mmx; |
2556 c->idct = ff_simple_idct_mmx; | 2556 c->idct = ff_simple_idct_mmx; |
2557 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; | 2557 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; |
2558 #ifdef CONFIG_GPL | 2558 #if CONFIG_GPL |
2559 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ | 2559 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ |
2560 if(mm_flags & FF_MM_MMXEXT){ | 2560 if(mm_flags & FF_MM_MMXEXT){ |
2561 c->idct_put= ff_libmpeg2mmx2_idct_put; | 2561 c->idct_put= ff_libmpeg2mmx2_idct_put; |
2562 c->idct_add= ff_libmpeg2mmx2_idct_add; | 2562 c->idct_add= ff_libmpeg2mmx2_idct_add; |
2563 c->idct = ff_mmxext_idct; | 2563 c->idct = ff_mmxext_idct; |
2851 H264_QPEL_FUNCS(2, 3, sse2); | 2851 H264_QPEL_FUNCS(2, 3, sse2); |
2852 H264_QPEL_FUNCS(3, 1, sse2); | 2852 H264_QPEL_FUNCS(3, 1, sse2); |
2853 H264_QPEL_FUNCS(3, 2, sse2); | 2853 H264_QPEL_FUNCS(3, 2, sse2); |
2854 H264_QPEL_FUNCS(3, 3, sse2); | 2854 H264_QPEL_FUNCS(3, 3, sse2); |
2855 } | 2855 } |
2856 #ifdef HAVE_SSSE3 | 2856 #if HAVE_SSSE3 |
2857 if(mm_flags & FF_MM_SSSE3){ | 2857 if(mm_flags & FF_MM_SSSE3){ |
2858 H264_QPEL_FUNCS(1, 0, ssse3); | 2858 H264_QPEL_FUNCS(1, 0, ssse3); |
2859 H264_QPEL_FUNCS(1, 1, ssse3); | 2859 H264_QPEL_FUNCS(1, 1, ssse3); |
2860 H264_QPEL_FUNCS(1, 2, ssse3); | 2860 H264_QPEL_FUNCS(1, 2, ssse3); |
2861 H264_QPEL_FUNCS(1, 3, ssse3); | 2861 H264_QPEL_FUNCS(1, 3, ssse3); |
2874 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3; | 2874 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3; |
2875 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; | 2875 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; |
2876 } | 2876 } |
2877 #endif | 2877 #endif |
2878 | 2878 |
2879 #if defined(CONFIG_GPL) && defined(HAVE_YASM) | 2879 #if CONFIG_GPL && HAVE_YASM |
2880 if( mm_flags&FF_MM_MMXEXT ){ | 2880 if( mm_flags&FF_MM_MMXEXT ){ |
2881 #ifdef ARCH_X86_32 | 2881 #if ARCH_X86_32 |
2882 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; | 2882 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; |
2883 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; | 2883 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; |
2884 #endif | 2884 #endif |
2885 if( mm_flags&FF_MM_SSE2 ){ | 2885 if( mm_flags&FF_MM_SSE2 ){ |
2886 #if defined(ARCH_X86_64) || !defined(__ICC) || __ICC > 1100 | 2886 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1100 |
2887 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; | 2887 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; |
2888 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; | 2888 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; |
2889 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; | 2889 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; |
2890 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; | 2890 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; |
2891 #endif | 2891 #endif |
2894 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; | 2894 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; |
2895 } | 2895 } |
2896 } | 2896 } |
2897 #endif | 2897 #endif |
2898 | 2898 |
2899 #ifdef CONFIG_SNOW_DECODER | 2899 #if CONFIG_SNOW_DECODER |
2900 if(mm_flags & FF_MM_SSE2 & 0){ | 2900 if(mm_flags & FF_MM_SSE2 & 0){ |
2901 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; | 2901 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; |
2902 #ifdef HAVE_7REGS | 2902 #if HAVE_7REGS |
2903 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; | 2903 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; |
2904 #endif | 2904 #endif |
2905 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; | 2905 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; |
2906 } | 2906 } |
2907 else{ | 2907 else{ |
2908 if(mm_flags & FF_MM_MMXEXT){ | 2908 if(mm_flags & FF_MM_MMXEXT){ |
2909 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; | 2909 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; |
2910 #ifdef HAVE_7REGS | 2910 #if HAVE_7REGS |
2911 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; | 2911 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; |
2912 #endif | 2912 #endif |
2913 } | 2913 } |
2914 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; | 2914 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; |
2915 } | 2915 } |