comparison x86/dsputil_mmx.c @ 8760:31138c296ac6 libavcodec

ff_add_hfyu_median_prediction_mmx2 overall ffvhuff decoding speedup: 28% on core2, 25% on k8.
author lorenm
date Sun, 08 Feb 2009 17:45:30 +0000
parents 191860960b23
children a5c8210814d7
comparison
equal deleted inserted replaced
8759:4cea2f47219a 8760:31138c296ac6
545 : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15) 545 : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
546 ); 546 );
547 for(; i<w; i++) 547 for(; i<w; i++)
548 dst[i] = src1[i] + src2[i]; 548 dst[i] = src1[i] + src2[i];
549 } 549 }
550
551 #if HAVE_7REGS
552 static void add_hfyu_median_prediction_cmov(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top) {
553 x86_reg w2 = -w;
554 x86_reg x;
555 int l = *left & 0xff;
556 int tl = *left_top & 0xff;
557 int t;
558 __asm__ volatile(
559 "mov %7, %3 \n"
560 "1: \n"
561 "movzx (%3,%4), %2 \n"
562 "mov %2, %k3 \n"
563 "sub %b1, %b3 \n"
564 "add %b0, %b3 \n"
565 "mov %2, %1 \n"
566 "cmp %0, %2 \n"
567 "cmovg %0, %2 \n"
568 "cmovg %1, %0 \n"
569 "cmp %k3, %0 \n"
570 "cmovg %k3, %0 \n"
571 "mov %7, %3 \n"
572 "cmp %2, %0 \n"
573 "cmovl %2, %0 \n"
574 "add (%6,%4), %b0 \n"
575 "mov %b0, (%5,%4) \n"
576 "inc %4 \n"
577 "jl 1b \n"
578 :"+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2)
579 :"r"(dst+w), "r"(diff+w), "rm"(top+w)
580 );
581 *left = l;
582 *left_top = tl;
583 }
584 #endif
550 585
551 #define H263_LOOP_FILTER \ 586 #define H263_LOOP_FILTER \
552 "pxor %%mm7, %%mm7 \n\t"\ 587 "pxor %%mm7, %%mm7 \n\t"\
553 "movq %0, %%mm0 \n\t"\ 588 "movq %0, %%mm0 \n\t"\
554 "movq %0, %%mm1 \n\t"\ 589 "movq %0, %%mm1 \n\t"\
2326 2361
2327 #if HAVE_YASM 2362 #if HAVE_YASM
2328 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); 2363 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
2329 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); 2364 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
2330 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); 2365 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
2366 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top);
2331 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); 2367 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
2332 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); 2368 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
2333 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); 2369 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
2334 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta); 2370 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
2335 #if ARCH_X86_32 2371 #if ARCH_X86_32
2758 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; 2794 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
2759 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; 2795 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
2760 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; 2796 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
2761 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; 2797 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
2762 2798
2799 #if HAVE_YASM
2800 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
2801 #endif
2802 #if HAVE_7REGS
2803 if( mm_flags&FF_MM_3DNOW )
2804 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
2805 #endif
2806
2763 if (CONFIG_CAVS_DECODER) 2807 if (CONFIG_CAVS_DECODER)
2764 ff_cavsdsp_init_mmx2(c, avctx); 2808 ff_cavsdsp_init_mmx2(c, avctx);
2765 2809
2766 if (CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER) 2810 if (CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER)
2767 ff_vc1dsp_init_mmx(c, avctx); 2811 ff_vc1dsp_init_mmx(c, avctx);