comparison h264.c @ 3645:47821be55b6c libavcodec

mmx implementation of deblocking strength decision. 2-3% faster h264.
author lorenm
date Mon, 28 Aug 2006 09:33:01 +0000
parents 5ef29058581e
children 2265410d1d74
comparison
equal deleted inserted replaced
3644:9eb3d84b6eef 3645:47821be55b6c
407 static VLC run7_vlc; 407 static VLC run7_vlc;
408 408
409 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); 409 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
410 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); 410 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
411 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); 411 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
412 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
412 413
413 static always_inline uint32_t pack16to32(int a, int b){ 414 static always_inline uint32_t pack16to32(int a, int b){
414 #ifdef WORDS_BIGENDIAN 415 #ifdef WORDS_BIGENDIAN
415 return (b&0xFFFF) + (a<<16); 416 return (b&0xFFFF) + (a<<16);
416 #else 417 #else
3877 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 3878 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3878 } else { 3879 } else {
3879 tprintf("call filter_mb\n"); 3880 tprintf("call filter_mb\n");
3880 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 3881 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3881 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb 3882 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3882 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 3883 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3883 } 3884 }
3884 } 3885 }
3885 } 3886 }
3886 3887
3887 /** 3888 /**
6692 6693
6693 return 0; 6694 return 0;
6694 } 6695 }
6695 6696
6696 6697
6697 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { 6698 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6698 int i, d; 6699 int i, d;
6699 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); 6700 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6700 const int alpha = alpha_table[index_a]; 6701 const int alpha = alpha_table[index_a];
6701 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; 6702 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6702 6703
6753 } 6754 }
6754 pix += stride; 6755 pix += stride;
6755 } 6756 }
6756 } 6757 }
6757 } 6758 }
6758 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { 6759 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6759 int i; 6760 int i;
6760 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); 6761 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6761 const int alpha = alpha_table[index_a]; 6762 const int alpha = alpha_table[index_a];
6762 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; 6763 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6763 6764
6769 } else { 6770 } else {
6770 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); 6771 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6771 } 6772 }
6772 } 6773 }
6773 6774
6774 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { 6775 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6775 int i; 6776 int i;
6776 for( i = 0; i < 16; i++, pix += stride) { 6777 for( i = 0; i < 16; i++, pix += stride) {
6777 int index_a; 6778 int index_a;
6778 int alpha; 6779 int alpha;
6779 int beta; 6780 int beta;
6867 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); 6868 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6868 } 6869 }
6869 } 6870 }
6870 } 6871 }
6871 } 6872 }
6872 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { 6873 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6873 int i; 6874 int i;
6874 for( i = 0; i < 8; i++, pix += stride) { 6875 for( i = 0; i < 8; i++, pix += stride) {
6875 int index_a; 6876 int index_a;
6876 int alpha; 6877 int alpha;
6877 int beta; 6878 int beta;
6920 } 6921 }
6921 } 6922 }
6922 } 6923 }
6923 } 6924 }
6924 6925
6925 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { 6926 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6926 int i, d; 6927 int i, d;
6927 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); 6928 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6928 const int alpha = alpha_table[index_a]; 6929 const int alpha = alpha_table[index_a];
6929 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; 6930 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6930 const int pix_next = stride; 6931 const int pix_next = stride;
6980 pix++; 6981 pix++;
6981 } 6982 }
6982 } 6983 }
6983 } 6984 }
6984 6985
6985 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { 6986 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6986 int i; 6987 int i;
6987 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); 6988 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6988 const int alpha = alpha_table[index_a]; 6989 const int alpha = alpha_table[index_a];
6989 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; 6990 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6990 6991
6993 for(i=0; i<4; i++) 6994 for(i=0; i<4; i++)
6994 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; 6995 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6995 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); 6996 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6996 } else { 6997 } else {
6997 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); 6998 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6999 }
7000 }
7001
7002 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7003 MpegEncContext * const s = &h->s;
7004 int mb_xy, mb_type;
7005 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7006
7007 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7008 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7009 return;
7010 }
7011 assert(!FRAME_MBAFF);
7012
7013 mb_xy = mb_x + mb_y*s->mb_stride;
7014 mb_type = s->current_picture.mb_type[mb_xy];
7015 qp = s->current_picture.qscale_table[mb_xy];
7016 qp0 = s->current_picture.qscale_table[mb_xy-1];
7017 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7018 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7019 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7020 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7021 qp0 = (qp + qp0 + 1) >> 1;
7022 qp1 = (qp + qp1 + 1) >> 1;
7023 qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7024 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh)
7025 return;
7026 qpc0 = (qpc + qpc0 + 1) >> 1;
7027 qpc1 = (qpc + qpc1 + 1) >> 1;
7028
7029 if( IS_INTRA(mb_type) ) {
7030 int16_t bS4[4] = {4,4,4,4};
7031 int16_t bS3[4] = {3,3,3,3};
7032 if( IS_8x8DCT(mb_type) ) {
7033 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7034 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7035 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7036 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7037 } else {
7038 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7039 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7040 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7041 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7042 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7043 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7044 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7045 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7046 }
7047 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7048 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7049 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7050 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7051 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7052 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7053 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7054 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7055 return;
7056 } else {
7057 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7058 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7059 int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7060 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7061 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7062 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7063 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7064 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7065 ? 3 : 0;
7066 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7067 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7068 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7069 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7070 bSv[0][0] = 0x0004000400040004ULL;
7071 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7072 bSv[1][0] = 0x0004000400040004ULL;
7073
7074 #define FILTER(hv,dir,edge)\
7075 if(bSv[dir][edge]) {\
7076 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7077 if(!(edge&1)) {\
7078 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7079 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7080 }\
7081 }
7082 if( edges == 1 ) {
7083 FILTER(v,0,0);
7084 FILTER(h,1,0);
7085 } else if( IS_8x8DCT(mb_type) ) {
7086 FILTER(v,0,0);
7087 FILTER(v,0,2);
7088 FILTER(h,1,0);
7089 FILTER(h,1,2);
7090 } else {
7091 FILTER(v,0,0);
7092 FILTER(v,0,1);
7093 FILTER(v,0,2);
7094 FILTER(v,0,3);
7095 FILTER(h,1,0);
7096 FILTER(h,1,1);
7097 FILTER(h,1,2);
7098 FILTER(h,1,3);
7099 }
7100 #undef FILTER
6998 } 7101 }
6999 } 7102 }
7000 7103
7001 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 7104 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7002 MpegEncContext * const s = &h->s; 7105 MpegEncContext * const s = &h->s;
7033 /* First vertical edge is different in MBAFF frames 7136 /* First vertical edge is different in MBAFF frames
7034 * There are 8 different bS to compute and 2 different Qp 7137 * There are 8 different bS to compute and 2 different Qp
7035 */ 7138 */
7036 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; 7139 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7037 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; 7140 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7038 int bS[8]; 7141 int16_t bS[8];
7039 int qp[2]; 7142 int qp[2];
7040 int chroma_qp[2]; 7143 int chroma_qp[2];
7041 int mb_qp, mbn0_qp, mbn1_qp; 7144 int mb_qp, mbn0_qp, mbn1_qp;
7042 int i; 7145 int i;
7043 first_vertical_edge_done = 1; 7146 first_vertical_edge_done = 1;
7112 unsigned int tmp_linesize = 2 * linesize; 7215 unsigned int tmp_linesize = 2 * linesize;
7113 unsigned int tmp_uvlinesize = 2 * uvlinesize; 7216 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7114 int mbn_xy = mb_xy - 2 * s->mb_stride; 7217 int mbn_xy = mb_xy - 2 * s->mb_stride;
7115 int qp, chroma_qp; 7218 int qp, chroma_qp;
7116 int i, j; 7219 int i, j;
7117 int bS[4]; 7220 int16_t bS[4];
7118 7221
7119 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ 7222 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7120 if( IS_INTRA(mb_type) || 7223 if( IS_INTRA(mb_type) ||
7121 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) { 7224 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7122 bS[0] = bS[1] = bS[2] = bS[3] = 3; 7225 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7148 /* Calculate bS */ 7251 /* Calculate bS */
7149 for( edge = start; edge < edges; edge++ ) { 7252 for( edge = start; edge < edges; edge++ ) {
7150 /* mbn_xy: neighbor macroblock */ 7253 /* mbn_xy: neighbor macroblock */
7151 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; 7254 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7152 const int mbn_type = s->current_picture.mb_type[mbn_xy]; 7255 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7153 int bS[4]; 7256 int16_t bS[4];
7154 int qp; 7257 int qp;
7155 7258
7156 if( (edge&1) && IS_8x8DCT(mb_type) ) 7259 if( (edge&1) && IS_8x8DCT(mb_type) )
7157 continue; 7260 continue;
7158 7261