comparison x86/dsputil_mmx.c @ 8590:7a463923ecd1 libavcodec

Change semantic of CONFIG_*, HAVE_* and ARCH_*. They are now always defined to either 0 or 1.
author aurel
date Tue, 13 Jan 2009 23:44:16 +0000
parents f8bf438c6000
children 68e959302527
comparison
equal deleted inserted replaced
8589:a29b5b5c3c9d 8590:7a463923ecd1
878 "pabsw %%mm3, %%mm3 \n"\ 878 "pabsw %%mm3, %%mm3 \n"\
879 "pabsw %%mm4, %%mm4 \n"\ 879 "pabsw %%mm4, %%mm4 \n"\
880 "pabsw %%mm5, %%mm5 \n" 880 "pabsw %%mm5, %%mm5 \n"
881 881
882 PAETH(mmx2, ABS3_MMX2) 882 PAETH(mmx2, ABS3_MMX2)
883 #ifdef HAVE_SSSE3 883 #if HAVE_SSSE3
884 PAETH(ssse3, ABS3_SSSE3) 884 PAETH(ssse3, ABS3_SSSE3)
885 #endif 885 #endif
886 886
887 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ 887 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
888 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\ 888 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
1763 void ff_mmx_idct(DCTELEM *block); 1763 void ff_mmx_idct(DCTELEM *block);
1764 void ff_mmxext_idct(DCTELEM *block); 1764 void ff_mmxext_idct(DCTELEM *block);
1765 1765
1766 /* XXX: those functions should be suppressed ASAP when all IDCTs are 1766 /* XXX: those functions should be suppressed ASAP when all IDCTs are
1767 converted */ 1767 converted */
1768 #ifdef CONFIG_GPL 1768 #if CONFIG_GPL
1769 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) 1769 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
1770 { 1770 {
1771 ff_mmx_idct (block); 1771 ff_mmx_idct (block);
1772 put_pixels_clamped_mmx(block, dest, line_size); 1772 put_pixels_clamped_mmx(block, dest, line_size);
1773 } 1773 }
2145 ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); 2145 ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
2146 } 2146 }
2147 2147
2148 static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1, 2148 static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
2149 const float *win, float add_bias, int len){ 2149 const float *win, float add_bias, int len){
2150 #ifdef HAVE_6REGS 2150 #if HAVE_6REGS
2151 if(add_bias == 0){ 2151 if(add_bias == 0){
2152 x86_reg i = -len*4; 2152 x86_reg i = -len*4;
2153 x86_reg j = len*4-8; 2153 x86_reg j = len*4-8;
2154 __asm__ volatile( 2154 __asm__ volatile(
2155 "1: \n" 2155 "1: \n"
2180 ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len); 2180 ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
2181 } 2181 }
2182 2182
2183 static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1, 2183 static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
2184 const float *win, float add_bias, int len){ 2184 const float *win, float add_bias, int len){
2185 #ifdef HAVE_6REGS 2185 #if HAVE_6REGS
2186 if(add_bias == 0){ 2186 if(add_bias == 0){
2187 x86_reg i = -len*4; 2187 x86_reg i = -len*4;
2188 x86_reg j = len*4-16; 2188 x86_reg j = len*4-16;
2189 __asm__ volatile( 2189 __asm__ volatile(
2190 "1: \n" 2190 "1: \n"
2322 " js 1b \n\t" 2322 " js 1b \n\t"
2323 :"+r"(reglen), "+r"(dst), "+r"(src) 2323 :"+r"(reglen), "+r"(dst), "+r"(src)
2324 ); 2324 );
2325 } 2325 }
2326 2326
2327 #ifdef HAVE_YASM 2327 #if HAVE_YASM
2328 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); 2328 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
2329 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); 2329 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
2330 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); 2330 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
2331 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); 2331 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
2332 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); 2332 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
2333 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); 2333 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
2334 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); 2334 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
2335 #ifdef ARCH_X86_32 2335 #if ARCH_X86_32
2336 static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) 2336 static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta)
2337 { 2337 {
2338 ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); 2338 ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
2339 ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); 2339 ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
2340 } 2340 }
2553 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ 2553 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
2554 c->idct_put= ff_simple_idct_put_mmx; 2554 c->idct_put= ff_simple_idct_put_mmx;
2555 c->idct_add= ff_simple_idct_add_mmx; 2555 c->idct_add= ff_simple_idct_add_mmx;
2556 c->idct = ff_simple_idct_mmx; 2556 c->idct = ff_simple_idct_mmx;
2557 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; 2557 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
2558 #ifdef CONFIG_GPL 2558 #if CONFIG_GPL
2559 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ 2559 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
2560 if(mm_flags & FF_MM_MMXEXT){ 2560 if(mm_flags & FF_MM_MMXEXT){
2561 c->idct_put= ff_libmpeg2mmx2_idct_put; 2561 c->idct_put= ff_libmpeg2mmx2_idct_put;
2562 c->idct_add= ff_libmpeg2mmx2_idct_add; 2562 c->idct_add= ff_libmpeg2mmx2_idct_add;
2563 c->idct = ff_mmxext_idct; 2563 c->idct = ff_mmxext_idct;
2851 H264_QPEL_FUNCS(2, 3, sse2); 2851 H264_QPEL_FUNCS(2, 3, sse2);
2852 H264_QPEL_FUNCS(3, 1, sse2); 2852 H264_QPEL_FUNCS(3, 1, sse2);
2853 H264_QPEL_FUNCS(3, 2, sse2); 2853 H264_QPEL_FUNCS(3, 2, sse2);
2854 H264_QPEL_FUNCS(3, 3, sse2); 2854 H264_QPEL_FUNCS(3, 3, sse2);
2855 } 2855 }
2856 #ifdef HAVE_SSSE3 2856 #if HAVE_SSSE3
2857 if(mm_flags & FF_MM_SSSE3){ 2857 if(mm_flags & FF_MM_SSSE3){
2858 H264_QPEL_FUNCS(1, 0, ssse3); 2858 H264_QPEL_FUNCS(1, 0, ssse3);
2859 H264_QPEL_FUNCS(1, 1, ssse3); 2859 H264_QPEL_FUNCS(1, 1, ssse3);
2860 H264_QPEL_FUNCS(1, 2, ssse3); 2860 H264_QPEL_FUNCS(1, 2, ssse3);
2861 H264_QPEL_FUNCS(1, 3, ssse3); 2861 H264_QPEL_FUNCS(1, 3, ssse3);
2874 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3; 2874 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;
2875 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; 2875 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
2876 } 2876 }
2877 #endif 2877 #endif
2878 2878
2879 #if defined(CONFIG_GPL) && defined(HAVE_YASM) 2879 #if CONFIG_GPL && HAVE_YASM
2880 if( mm_flags&FF_MM_MMXEXT ){ 2880 if( mm_flags&FF_MM_MMXEXT ){
2881 #ifdef ARCH_X86_32 2881 #if ARCH_X86_32
2882 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; 2882 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
2883 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; 2883 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
2884 #endif 2884 #endif
2885 if( mm_flags&FF_MM_SSE2 ){ 2885 if( mm_flags&FF_MM_SSE2 ){
2886 #if defined(ARCH_X86_64) || !defined(__ICC) || __ICC > 1100 2886 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1100
2887 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; 2887 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
2888 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; 2888 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
2889 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; 2889 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
2890 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; 2890 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
2891 #endif 2891 #endif
2894 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; 2894 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
2895 } 2895 }
2896 } 2896 }
2897 #endif 2897 #endif
2898 2898
2899 #ifdef CONFIG_SNOW_DECODER 2899 #if CONFIG_SNOW_DECODER
2900 if(mm_flags & FF_MM_SSE2 & 0){ 2900 if(mm_flags & FF_MM_SSE2 & 0){
2901 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; 2901 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
2902 #ifdef HAVE_7REGS 2902 #if HAVE_7REGS
2903 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; 2903 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
2904 #endif 2904 #endif
2905 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; 2905 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
2906 } 2906 }
2907 else{ 2907 else{
2908 if(mm_flags & FF_MM_MMXEXT){ 2908 if(mm_flags & FF_MM_MMXEXT){
2909 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; 2909 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
2910 #ifdef HAVE_7REGS 2910 #if HAVE_7REGS
2911 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; 2911 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
2912 #endif 2912 #endif
2913 } 2913 }
2914 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; 2914 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
2915 } 2915 }