Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.c @ 12450:3941687b4fa9 libavcodec
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
still #included in dsputil_mmx.c and is part of DSPContext, and h264dsp_mmx.c,
which represents H264DSPContext and is now compiled on its own.
author | rbultje |
---|---|
date | Wed, 01 Sep 2010 20:48:59 +0000 |
parents | 51fc247eed32 |
children | f4355cd85faa |
comparison
equal
deleted
inserted
replaced
12449:3bca212d6f51 | 12450:3941687b4fa9 |
---|---|
722 "+m" (*(uint64_t*)(src + 0*stride)), | 722 "+m" (*(uint64_t*)(src + 0*stride)), |
723 "+m" (*(uint64_t*)(src + 1*stride)) | 723 "+m" (*(uint64_t*)(src + 1*stride)) |
724 : "g" (2*strength), "m"(ff_pb_FC) | 724 : "g" (2*strength), "m"(ff_pb_FC) |
725 ); | 725 ); |
726 } | 726 } |
727 } | |
728 | |
729 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ | |
730 __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... | |
731 "movd %4, %%mm0 \n\t" | |
732 "movd %5, %%mm1 \n\t" | |
733 "movd %6, %%mm2 \n\t" | |
734 "movd %7, %%mm3 \n\t" | |
735 "punpcklbw %%mm1, %%mm0 \n\t" | |
736 "punpcklbw %%mm3, %%mm2 \n\t" | |
737 "movq %%mm0, %%mm1 \n\t" | |
738 "punpcklwd %%mm2, %%mm0 \n\t" | |
739 "punpckhwd %%mm2, %%mm1 \n\t" | |
740 "movd %%mm0, %0 \n\t" | |
741 "punpckhdq %%mm0, %%mm0 \n\t" | |
742 "movd %%mm0, %1 \n\t" | |
743 "movd %%mm1, %2 \n\t" | |
744 "punpckhdq %%mm1, %%mm1 \n\t" | |
745 "movd %%mm1, %3 \n\t" | |
746 | |
747 : "=m" (*(uint32_t*)(dst + 0*dst_stride)), | |
748 "=m" (*(uint32_t*)(dst + 1*dst_stride)), | |
749 "=m" (*(uint32_t*)(dst + 2*dst_stride)), | |
750 "=m" (*(uint32_t*)(dst + 3*dst_stride)) | |
751 : "m" (*(uint32_t*)(src + 0*src_stride)), | |
752 "m" (*(uint32_t*)(src + 1*src_stride)), | |
753 "m" (*(uint32_t*)(src + 2*src_stride)), | |
754 "m" (*(uint32_t*)(src + 3*src_stride)) | |
755 ); | |
756 } | 727 } |
757 | 728 |
758 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | 729 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ |
759 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | 730 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
760 const int strength= ff_h263_loop_filter_strength[qscale]; | 731 const int strength= ff_h263_loop_filter_strength[qscale]; |
1816 } | 1787 } |
1817 PREFETCH(prefetch_mmx2, prefetcht0) | 1788 PREFETCH(prefetch_mmx2, prefetcht0) |
1818 PREFETCH(prefetch_3dnow, prefetch) | 1789 PREFETCH(prefetch_3dnow, prefetch) |
1819 #undef PREFETCH | 1790 #undef PREFETCH |
1820 | 1791 |
1821 #include "h264dsp_mmx.c" | 1792 #include "h264_qpel_mmx.c" |
1822 | 1793 |
1823 void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src, | 1794 void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src, |
1824 int stride, int h, int x, int y); | 1795 int stride, int h, int x, int y); |
1825 void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src, | 1796 void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src, |
1826 int stride, int h, int x, int y); | 1797 int stride, int h, int x, int y); |
2447 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); | 2418 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); |
2448 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); | 2419 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); |
2449 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); | 2420 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); |
2450 int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left); | 2421 int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left); |
2451 int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left); | 2422 int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left); |
2452 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); | 2423 |
2453 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); | 2424 #if !HAVE_YASM |
2454 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); | |
2455 void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta); | |
2456 void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta); | |
2457 | |
2458 #if HAVE_YASM && ARCH_X86_32 | |
2459 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); | |
2460 static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) | |
2461 { | |
2462 ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); | |
2463 ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); | |
2464 } | |
2465 #elif !HAVE_YASM | |
2466 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6) | 2425 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6) |
2467 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6) | 2426 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6) |
2468 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6) | 2427 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6) |
2469 #endif | 2428 #endif |
2470 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | 2429 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse |
2992 | 2951 |
2993 //av_fdct = just_return; | 2952 //av_fdct = just_return; |
2994 //ff_idct = just_return; | 2953 //ff_idct = just_return; |
2995 #endif | 2954 #endif |
2996 } | 2955 } |
2997 | |
2998 #if CONFIG_H264DSP | |
2999 void ff_h264dsp_init_x86(H264DSPContext *c) | |
3000 { | |
3001 int mm_flags = mm_support(); | |
3002 | |
3003 if (mm_flags & FF_MM_MMX) { | |
3004 c->h264_idct_dc_add= | |
3005 c->h264_idct_add= ff_h264_idct_add_mmx; | |
3006 c->h264_idct8_dc_add= | |
3007 c->h264_idct8_add= ff_h264_idct8_add_mmx; | |
3008 | |
3009 c->h264_idct_add16 = ff_h264_idct_add16_mmx; | |
3010 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; | |
3011 c->h264_idct_add8 = ff_h264_idct_add8_mmx; | |
3012 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; | |
3013 | |
3014 if (mm_flags & FF_MM_MMX2) { | |
3015 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; | |
3016 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; | |
3017 c->h264_idct_add16 = ff_h264_idct_add16_mmx2; | |
3018 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; | |
3019 c->h264_idct_add8 = ff_h264_idct_add8_mmx2; | |
3020 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; | |
3021 | |
3022 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; | |
3023 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; | |
3024 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; | |
3025 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2; | |
3026 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2; | |
3027 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2; | |
3028 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; | |
3029 | |
3030 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; | |
3031 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; | |
3032 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; | |
3033 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; | |
3034 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; | |
3035 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; | |
3036 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; | |
3037 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; | |
3038 | |
3039 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; | |
3040 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; | |
3041 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; | |
3042 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; | |
3043 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; | |
3044 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; | |
3045 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; | |
3046 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; | |
3047 } | |
3048 if(mm_flags & FF_MM_SSE2){ | |
3049 c->h264_idct8_add = ff_h264_idct8_add_sse2; | |
3050 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; | |
3051 } | |
3052 | |
3053 #if HAVE_YASM | |
3054 if (mm_flags & FF_MM_MMX2){ | |
3055 #if ARCH_X86_32 | |
3056 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; | |
3057 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; | |
3058 #endif | |
3059 if( mm_flags&FF_MM_SSE2 ){ | |
3060 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2; | |
3061 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2; | |
3062 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110 | |
3063 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; | |
3064 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; | |
3065 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; | |
3066 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; | |
3067 #endif | |
3068 #if CONFIG_GPL | |
3069 c->h264_idct_add16 = ff_h264_idct_add16_sse2; | |
3070 c->h264_idct_add8 = ff_h264_idct_add8_sse2; | |
3071 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; | |
3072 #endif | |
3073 } | |
3074 if ( mm_flags&FF_MM_SSSE3 ){ | |
3075 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3; | |
3076 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3; | |
3077 } | |
3078 } | |
3079 #endif | |
3080 } | |
3081 } | |
3082 #endif /* CONFIG_H264DSP */ |