Mercurial > libavcodec.hg
comparison h264.c @ 3645:47821be55b6c libavcodec
mmx implementation of deblocking strength decision.
2-3% faster h264.
author | lorenm |
---|---|
date | Mon, 28 Aug 2006 09:33:01 +0000 |
parents | 5ef29058581e |
children | 2265410d1d74 |
comparison
equal
deleted
inserted
replaced
3644:9eb3d84b6eef | 3645:47821be55b6c |
---|---|
407 static VLC run7_vlc; | 407 static VLC run7_vlc; |
408 | 408 |
409 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); | 409 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); |
410 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); | 410 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); |
411 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); | 411 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); |
412 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); | |
412 | 413 |
413 static always_inline uint32_t pack16to32(int a, int b){ | 414 static always_inline uint32_t pack16to32(int a, int b){ |
414 #ifdef WORDS_BIGENDIAN | 415 #ifdef WORDS_BIGENDIAN |
415 return (b&0xFFFF) + (a<<16); | 416 return (b&0xFFFF) + (a<<16); |
416 #else | 417 #else |
3877 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize); | 3878 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize); |
3878 } else { | 3879 } else { |
3879 tprintf("call filter_mb\n"); | 3880 tprintf("call filter_mb\n"); |
3880 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); | 3881 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); |
3881 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb | 3882 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb |
3882 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); | 3883 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); |
3883 } | 3884 } |
3884 } | 3885 } |
3885 } | 3886 } |
3886 | 3887 |
3887 /** | 3888 /** |
6692 | 6693 |
6693 return 0; | 6694 return 0; |
6694 } | 6695 } |
6695 | 6696 |
6696 | 6697 |
6697 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { | 6698 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { |
6698 int i, d; | 6699 int i, d; |
6699 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); | 6700 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); |
6700 const int alpha = alpha_table[index_a]; | 6701 const int alpha = alpha_table[index_a]; |
6701 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; | 6702 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; |
6702 | 6703 |
6753 } | 6754 } |
6754 pix += stride; | 6755 pix += stride; |
6755 } | 6756 } |
6756 } | 6757 } |
6757 } | 6758 } |
6758 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { | 6759 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { |
6759 int i; | 6760 int i; |
6760 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); | 6761 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); |
6761 const int alpha = alpha_table[index_a]; | 6762 const int alpha = alpha_table[index_a]; |
6762 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; | 6763 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; |
6763 | 6764 |
6769 } else { | 6770 } else { |
6770 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); | 6771 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); |
6771 } | 6772 } |
6772 } | 6773 } |
6773 | 6774 |
6774 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { | 6775 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { |
6775 int i; | 6776 int i; |
6776 for( i = 0; i < 16; i++, pix += stride) { | 6777 for( i = 0; i < 16; i++, pix += stride) { |
6777 int index_a; | 6778 int index_a; |
6778 int alpha; | 6779 int alpha; |
6779 int beta; | 6780 int beta; |
6867 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | 6868 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); |
6868 } | 6869 } |
6869 } | 6870 } |
6870 } | 6871 } |
6871 } | 6872 } |
6872 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { | 6873 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { |
6873 int i; | 6874 int i; |
6874 for( i = 0; i < 8; i++, pix += stride) { | 6875 for( i = 0; i < 8; i++, pix += stride) { |
6875 int index_a; | 6876 int index_a; |
6876 int alpha; | 6877 int alpha; |
6877 int beta; | 6878 int beta; |
6920 } | 6921 } |
6921 } | 6922 } |
6922 } | 6923 } |
6923 } | 6924 } |
6924 | 6925 |
6925 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { | 6926 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { |
6926 int i, d; | 6927 int i, d; |
6927 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); | 6928 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); |
6928 const int alpha = alpha_table[index_a]; | 6929 const int alpha = alpha_table[index_a]; |
6929 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; | 6930 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; |
6930 const int pix_next = stride; | 6931 const int pix_next = stride; |
6980 pix++; | 6981 pix++; |
6981 } | 6982 } |
6982 } | 6983 } |
6983 } | 6984 } |
6984 | 6985 |
6985 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { | 6986 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { |
6986 int i; | 6987 int i; |
6987 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); | 6988 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); |
6988 const int alpha = alpha_table[index_a]; | 6989 const int alpha = alpha_table[index_a]; |
6989 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; | 6990 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; |
6990 | 6991 |
6993 for(i=0; i<4; i++) | 6994 for(i=0; i<4; i++) |
6994 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; | 6995 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; |
6995 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); | 6996 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); |
6996 } else { | 6997 } else { |
6997 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); | 6998 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); |
6999 } | |
7000 } | |
7001 | |
7002 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
7003 MpegEncContext * const s = &h->s; | |
7004 int mb_xy, mb_type; | |
7005 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; | |
7006 | |
7007 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) { | |
7008 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); | |
7009 return; | |
7010 } | |
7011 assert(!FRAME_MBAFF); | |
7012 | |
7013 mb_xy = mb_x + mb_y*s->mb_stride; | |
7014 mb_type = s->current_picture.mb_type[mb_xy]; | |
7015 qp = s->current_picture.qscale_table[mb_xy]; | |
7016 qp0 = s->current_picture.qscale_table[mb_xy-1]; | |
7017 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; | |
7018 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp ); | |
7019 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 ); | |
7020 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 ); | |
7021 qp0 = (qp + qp0 + 1) >> 1; | |
7022 qp1 = (qp + qp1 + 1) >> 1; | |
7023 qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset); | |
7024 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh) | |
7025 return; | |
7026 qpc0 = (qpc + qpc0 + 1) >> 1; | |
7027 qpc1 = (qpc + qpc1 + 1) >> 1; | |
7028 | |
7029 if( IS_INTRA(mb_type) ) { | |
7030 int16_t bS4[4] = {4,4,4,4}; | |
7031 int16_t bS3[4] = {3,3,3,3}; | |
7032 if( IS_8x8DCT(mb_type) ) { | |
7033 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 ); | |
7034 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp ); | |
7035 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 ); | |
7036 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp ); | |
7037 } else { | |
7038 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 ); | |
7039 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp ); | |
7040 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp ); | |
7041 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp ); | |
7042 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 ); | |
7043 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp ); | |
7044 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp ); | |
7045 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp ); | |
7046 } | |
7047 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 ); | |
7048 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc ); | |
7049 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 ); | |
7050 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc ); | |
7051 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 ); | |
7052 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc ); | |
7053 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 ); | |
7054 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc ); | |
7055 return; | |
7056 } else { | |
7057 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]); | |
7058 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; | |
7059 int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) | |
7060 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; | |
7061 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : | |
7062 (mb_type & MB_TYPE_16x8) ? 1 : 0; | |
7063 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) | |
7064 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16)) | |
7065 ? 3 : 0; | |
7066 int step = IS_8x8DCT(mb_type) ? 2 : 1; | |
7067 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, | |
7068 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 ); | |
7069 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) ) | |
7070 bSv[0][0] = 0x0004000400040004ULL; | |
7071 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) ) | |
7072 bSv[1][0] = 0x0004000400040004ULL; | |
7073 | |
7074 #define FILTER(hv,dir,edge)\ | |
7075 if(bSv[dir][edge]) {\ | |
7076 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\ | |
7077 if(!(edge&1)) {\ | |
7078 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\ | |
7079 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\ | |
7080 }\ | |
7081 } | |
7082 if( edges == 1 ) { | |
7083 FILTER(v,0,0); | |
7084 FILTER(h,1,0); | |
7085 } else if( IS_8x8DCT(mb_type) ) { | |
7086 FILTER(v,0,0); | |
7087 FILTER(v,0,2); | |
7088 FILTER(h,1,0); | |
7089 FILTER(h,1,2); | |
7090 } else { | |
7091 FILTER(v,0,0); | |
7092 FILTER(v,0,1); | |
7093 FILTER(v,0,2); | |
7094 FILTER(v,0,3); | |
7095 FILTER(h,1,0); | |
7096 FILTER(h,1,1); | |
7097 FILTER(h,1,2); | |
7098 FILTER(h,1,3); | |
7099 } | |
7100 #undef FILTER | |
6998 } | 7101 } |
6999 } | 7102 } |
7000 | 7103 |
7001 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | 7104 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { |
7002 MpegEncContext * const s = &h->s; | 7105 MpegEncContext * const s = &h->s; |
7033 /* First vertical edge is different in MBAFF frames | 7136 /* First vertical edge is different in MBAFF frames |
7034 * There are 8 different bS to compute and 2 different Qp | 7137 * There are 8 different bS to compute and 2 different Qp |
7035 */ | 7138 */ |
7036 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; | 7139 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; |
7037 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; | 7140 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; |
7038 int bS[8]; | 7141 int16_t bS[8]; |
7039 int qp[2]; | 7142 int qp[2]; |
7040 int chroma_qp[2]; | 7143 int chroma_qp[2]; |
7041 int mb_qp, mbn0_qp, mbn1_qp; | 7144 int mb_qp, mbn0_qp, mbn1_qp; |
7042 int i; | 7145 int i; |
7043 first_vertical_edge_done = 1; | 7146 first_vertical_edge_done = 1; |
7112 unsigned int tmp_linesize = 2 * linesize; | 7215 unsigned int tmp_linesize = 2 * linesize; |
7113 unsigned int tmp_uvlinesize = 2 * uvlinesize; | 7216 unsigned int tmp_uvlinesize = 2 * uvlinesize; |
7114 int mbn_xy = mb_xy - 2 * s->mb_stride; | 7217 int mbn_xy = mb_xy - 2 * s->mb_stride; |
7115 int qp, chroma_qp; | 7218 int qp, chroma_qp; |
7116 int i, j; | 7219 int i, j; |
7117 int bS[4]; | 7220 int16_t bS[4]; |
7118 | 7221 |
7119 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ | 7222 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ |
7120 if( IS_INTRA(mb_type) || | 7223 if( IS_INTRA(mb_type) || |
7121 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) { | 7224 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) { |
7122 bS[0] = bS[1] = bS[2] = bS[3] = 3; | 7225 bS[0] = bS[1] = bS[2] = bS[3] = 3; |
7148 /* Calculate bS */ | 7251 /* Calculate bS */ |
7149 for( edge = start; edge < edges; edge++ ) { | 7252 for( edge = start; edge < edges; edge++ ) { |
7150 /* mbn_xy: neighbor macroblock */ | 7253 /* mbn_xy: neighbor macroblock */ |
7151 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; | 7254 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; |
7152 const int mbn_type = s->current_picture.mb_type[mbn_xy]; | 7255 const int mbn_type = s->current_picture.mb_type[mbn_xy]; |
7153 int bS[4]; | 7256 int16_t bS[4]; |
7154 int qp; | 7257 int qp; |
7155 | 7258 |
7156 if( (edge&1) && IS_8x8DCT(mb_type) ) | 7259 if( (edge&1) && IS_8x8DCT(mb_type) ) |
7157 continue; | 7260 continue; |
7158 | 7261 |