Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.c @ 8760:31138c296ac6 libavcodec
ff_add_hfyu_median_prediction_mmx2
overall ffvhuff decoding speedup: 28% on core2, 25% on k8.
author | lorenm |
---|---|
date | Sun, 08 Feb 2009 17:45:30 +0000 |
parents | 191860960b23 |
children | a5c8210814d7 |
comparison
equal
deleted
inserted
replaced
8759:4cea2f47219a | 8760:31138c296ac6 |
---|---|
545 : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15) | 545 : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15) |
546 ); | 546 ); |
547 for(; i<w; i++) | 547 for(; i<w; i++) |
548 dst[i] = src1[i] + src2[i]; | 548 dst[i] = src1[i] + src2[i]; |
549 } | 549 } |
550 | |
551 #if HAVE_7REGS | |
552 static void add_hfyu_median_prediction_cmov(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top) { | |
553 x86_reg w2 = -w; | |
554 x86_reg x; | |
555 int l = *left & 0xff; | |
556 int tl = *left_top & 0xff; | |
557 int t; | |
558 __asm__ volatile( | |
559 "mov %7, %3 \n" | |
560 "1: \n" | |
561 "movzx (%3,%4), %2 \n" | |
562 "mov %2, %k3 \n" | |
563 "sub %b1, %b3 \n" | |
564 "add %b0, %b3 \n" | |
565 "mov %2, %1 \n" | |
566 "cmp %0, %2 \n" | |
567 "cmovg %0, %2 \n" | |
568 "cmovg %1, %0 \n" | |
569 "cmp %k3, %0 \n" | |
570 "cmovg %k3, %0 \n" | |
571 "mov %7, %3 \n" | |
572 "cmp %2, %0 \n" | |
573 "cmovl %2, %0 \n" | |
574 "add (%6,%4), %b0 \n" | |
575 "mov %b0, (%5,%4) \n" | |
576 "inc %4 \n" | |
577 "jl 1b \n" | |
578 :"+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2) | |
579 :"r"(dst+w), "r"(diff+w), "rm"(top+w) | |
580 ); | |
581 *left = l; | |
582 *left_top = tl; | |
583 } | |
584 #endif | |
550 | 585 |
551 #define H263_LOOP_FILTER \ | 586 #define H263_LOOP_FILTER \ |
552 "pxor %%mm7, %%mm7 \n\t"\ | 587 "pxor %%mm7, %%mm7 \n\t"\ |
553 "movq %0, %%mm0 \n\t"\ | 588 "movq %0, %%mm0 \n\t"\ |
554 "movq %0, %%mm1 \n\t"\ | 589 "movq %0, %%mm1 \n\t"\ |
2326 | 2361 |
2327 #if HAVE_YASM | 2362 #if HAVE_YASM |
2328 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | 2363 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); |
2329 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | 2364 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); |
2330 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); | 2365 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); |
2366 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top); | |
2331 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); | 2367 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); |
2332 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); | 2368 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); |
2333 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); | 2369 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); |
2334 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); | 2370 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); |
2335 #if ARCH_X86_32 | 2371 #if ARCH_X86_32 |
2758 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; | 2794 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; |
2759 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; | 2795 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; |
2760 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; | 2796 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; |
2761 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; | 2797 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; |
2762 | 2798 |
2799 #if HAVE_YASM | |
2800 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; | |
2801 #endif | |
2802 #if HAVE_7REGS | |
2803 if( mm_flags&FF_MM_3DNOW ) | |
2804 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; | |
2805 #endif | |
2806 | |
2763 if (CONFIG_CAVS_DECODER) | 2807 if (CONFIG_CAVS_DECODER) |
2764 ff_cavsdsp_init_mmx2(c, avctx); | 2808 ff_cavsdsp_init_mmx2(c, avctx); |
2765 | 2809 |
2766 if (CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER) | 2810 if (CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER) |
2767 ff_vc1dsp_init_mmx(c, avctx); | 2811 ff_vc1dsp_init_mmx(c, avctx); |