comparison i386/dsputil_mmx.c @ 3215:06f98047ff26 libavcodec

prefetch pixels for future motion compensation. 2-5% faster h264.
author lorenm
date Thu, 23 Mar 2006 20:16:36 +0000
parents 57d31bdbebe8
children 7aa9f80e7954
comparison
equal deleted inserted replaced
3214:91f89a395b28 3215:06f98047ff26
2487 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); 2487 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
2488 } 2488 }
2489 } 2489 }
2490 } 2490 }
2491 2491
2492 #define PREFETCH(name, op) \
2493 void name(void *mem, int stride, int h){\
2494 const uint8_t *p= mem;\
2495 do{\
2496 asm volatile(#op" %0" :: "m"(*p));\
2497 p+= stride;\
2498 }while(--h);\
2499 }
2500 PREFETCH(prefetch_mmx2, prefetcht0)
2501 PREFETCH(prefetch_3dnow, prefetch)
2502 #undef PREFETCH
2503
2492 #include "h264dsp_mmx.c" 2504 #include "h264dsp_mmx.c"
2493 2505
2494 /* external functions, from idct_mmx.c */ 2506 /* external functions, from idct_mmx.c */
2495 void ff_mmx_idct(DCTELEM *block); 2507 void ff_mmx_idct(DCTELEM *block);
2496 void ff_mmxext_idct(DCTELEM *block); 2508 void ff_mmxext_idct(DCTELEM *block);
2747 c->h264_idct_add= ff_h264_idct_add_mmx; 2759 c->h264_idct_add= ff_h264_idct_add_mmx;
2748 c->h264_idct8_dc_add= 2760 c->h264_idct8_dc_add=
2749 c->h264_idct8_add= ff_h264_idct8_add_mmx; 2761 c->h264_idct8_add= ff_h264_idct8_add_mmx;
2750 2762
2751 if (mm_flags & MM_MMXEXT) { 2763 if (mm_flags & MM_MMXEXT) {
2764 c->prefetch = prefetch_mmx2;
2765
2752 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; 2766 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
2753 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; 2767 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
2754 2768
2755 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; 2769 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
2756 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; 2770 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
2877 2891
2878 #ifdef CONFIG_ENCODERS 2892 #ifdef CONFIG_ENCODERS
2879 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; 2893 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
2880 #endif //CONFIG_ENCODERS 2894 #endif //CONFIG_ENCODERS
2881 } else if (mm_flags & MM_3DNOW) { 2895 } else if (mm_flags & MM_3DNOW) {
2896 c->prefetch = prefetch_3dnow;
2897
2882 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; 2898 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
2883 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; 2899 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
2884 2900
2885 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; 2901 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
2886 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; 2902 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;