Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.c @ 10430:12c8175d6db5 libavcodec
simd add_hfyu_left_prediction
2.2x faster than C on conroe, 3.6x on penryn.
4-6% faster huffyuv decoding if using left or plane mode and yuv
author | lorenm |
---|---|
date | Sun, 18 Oct 2009 20:10:10 +0000 |
parents | 02798c603744 |
children | 546b7ebeaf07 |
comparison
equal
deleted
inserted
replaced
10429:289dd8daf4ee | 10430:12c8175d6db5 |
---|---|
2383 #if HAVE_YASM | 2383 #if HAVE_YASM |
2384 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | 2384 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); |
2385 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | 2385 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); |
2386 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); | 2386 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); |
2387 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top); | 2387 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top); |
2388 int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, uint8_t *src, int w, int left); | |
2389 int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, uint8_t *src, int w, int left); | |
2388 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); | 2390 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); |
2389 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); | 2391 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); |
2390 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); | 2392 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); |
2391 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); | 2393 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); |
2392 #if ARCH_X86_32 | 2394 #if ARCH_X86_32 |
2949 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd; | 2951 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd; |
2950 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd; | 2952 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd; |
2951 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3; | 2953 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3; |
2952 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3; | 2954 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3; |
2953 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; | 2955 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; |
2956 #if HAVE_YASM | |
2957 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; | |
2958 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe | |
2959 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; | |
2960 #endif | |
2954 } | 2961 } |
2955 #endif | 2962 #endif |
2956 | 2963 |
2957 #if CONFIG_GPL && HAVE_YASM | 2964 #if CONFIG_GPL && HAVE_YASM |
2958 if (mm_flags & FF_MM_MMX2){ | 2965 if (mm_flags & FF_MM_MMX2){ |