comparison h264.c @ 3215:06f98047ff26 libavcodec

prefetch pixels for future motion compensation. 2-5% faster h264.
author lorenm
date Thu, 23 Mar 2006 20:16:36 +0000
parents 5fb704618ec4
children cf233efbcece
comparison
equal deleted inserted replaced
3214:91f89a395b28 3215:06f98047ff26
2750 else 2750 else
2751 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 2751 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2752 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); 2752 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2753 } 2753 }
2754 2754
2755 static inline void prefetch_motion(H264Context *h, int list){
2756 /* fetch pixels for estimated mv 4 macroblocks ahead
2757 * optimized for 64byte cache lines */
2758 MpegEncContext * const s = &h->s;
2759 const int refn = h->ref_cache[list][scan8[0]];
2760 if(refn >= 0){
2761 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2762 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2763 uint8_t **src= h->ref_list[list][refn].data;
2764 int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
2765 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2766 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2767 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2768 }
2769 }
2770
2755 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 2771 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2756 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), 2772 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2757 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), 2773 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2758 h264_weight_func *weight_op, h264_biweight_func *weight_avg){ 2774 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2759 MpegEncContext * const s = &h->s; 2775 MpegEncContext * const s = &h->s;
2760 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; 2776 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2761 const int mb_type= s->current_picture.mb_type[mb_xy]; 2777 const int mb_type= s->current_picture.mb_type[mb_xy];
2762 2778
2763 assert(IS_INTER(mb_type)); 2779 assert(IS_INTER(mb_type));
2780
2781 prefetch_motion(h, 0);
2764 2782
2765 if(IS_16X16(mb_type)){ 2783 if(IS_16X16(mb_type)){
2766 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, 2784 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2767 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], 2785 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2768 &weight_op[0], &weight_avg[0], 2786 &weight_op[0], &weight_avg[0],
2831 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 2849 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2832 } 2850 }
2833 } 2851 }
2834 } 2852 }
2835 } 2853 }
2854
2855 prefetch_motion(h, 1);
2836 } 2856 }
2837 2857
2838 static void decode_init_vlc(H264Context *h){ 2858 static void decode_init_vlc(H264Context *h){
2839 static int done = 0; 2859 static int done = 0;
2840 2860