comparison i386/dsputil_mmx.c @ 8375:de2509cf3c44 libavcodec

H.264 idct functions that include the chroma, inter luma and intra16 luma loops thus avoiding the calling overhead. New functions are not yet used.
author michael
date Thu, 18 Dec 2008 02:36:48 +0000
parents 08b0f63a91c5
children 60b6a780100b
comparison
equal deleted inserted replaced
8374:9000fd7c166e 8375:de2509cf3c44
2627 2627
2628 c->h264_idct_dc_add= 2628 c->h264_idct_dc_add=
2629 c->h264_idct_add= ff_h264_idct_add_mmx; 2629 c->h264_idct_add= ff_h264_idct_add_mmx;
2630 c->h264_idct8_dc_add= 2630 c->h264_idct8_dc_add=
2631 c->h264_idct8_add= ff_h264_idct8_add_mmx; 2631 c->h264_idct8_add= ff_h264_idct8_add_mmx;
2632 if (mm_flags & FF_MM_SSE2) 2632
2633 c->h264_idct8_add= ff_h264_idct8_add_sse2; 2633 c->h264_idct_add16 = ff_h264_idct_add16_mmx;
2634 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
2635 c->h264_idct_add8 = ff_h264_idct_add8_mmx;
2636 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
2634 2637
2635 if (mm_flags & FF_MM_MMXEXT) { 2638 if (mm_flags & FF_MM_MMXEXT) {
2636 c->prefetch = prefetch_mmx2; 2639 c->prefetch = prefetch_mmx2;
2637 2640
2638 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; 2641 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
2649 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; 2652 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
2650 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; 2653 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
2651 2654
2652 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; 2655 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
2653 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; 2656 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
2657 c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
2658 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
2659 c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
2660 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
2654 2661
2655 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 2662 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2656 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; 2663 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
2657 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; 2664 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
2658 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; 2665 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
2805 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; 2812 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
2806 */ 2813 */
2807 H264_QPEL_FUNCS(0, 0, sse2); 2814 H264_QPEL_FUNCS(0, 0, sse2);
2808 } 2815 }
2809 if(mm_flags & FF_MM_SSE2){ 2816 if(mm_flags & FF_MM_SSE2){
2817 c->h264_idct8_add = ff_h264_idct8_add_sse2;
2818 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
2819
2810 H264_QPEL_FUNCS(0, 1, sse2); 2820 H264_QPEL_FUNCS(0, 1, sse2);
2811 H264_QPEL_FUNCS(0, 2, sse2); 2821 H264_QPEL_FUNCS(0, 2, sse2);
2812 H264_QPEL_FUNCS(0, 3, sse2); 2822 H264_QPEL_FUNCS(0, 3, sse2);
2813 H264_QPEL_FUNCS(1, 1, sse2); 2823 H264_QPEL_FUNCS(1, 1, sse2);
2814 H264_QPEL_FUNCS(1, 2, sse2); 2824 H264_QPEL_FUNCS(1, 2, sse2);