comparison h264_loopfilter.c @ 10924:fb0307a3355e libavcodec

Rather call filter_mb_mbaff_edge*v() more often than do extra calculations in the innerst loop. ~150 cpu cycles faster
author michael
date Mon, 18 Jan 2010 21:22:09 +0000
parents 8945125b5ae6
children b847f02d5b03
comparison
equal deleted inserted replaced
10923:7a949922866e 10924:fb0307a3355e
136 } else { 136 } else {
137 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); 137 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
138 } 138 }
139 } 139 }
140 140
141 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { 141 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) {
142 int i; 142 int i;
143 for( i = 0; i < 16; i++, pix += stride) { 143 for( i = 0; i < 8; i++, pix += stride) {
144 int index_a; 144 int index_a;
145 int alpha; 145 int alpha;
146 int beta; 146 int beta;
147 147 const int bS_index = (i >> 1) * bsi;
148 int qp_index;
149 int bS_index = (i >> 1);
150 if (!MB_FIELD) {
151 bS_index &= ~1;
152 bS_index |= (i & 1);
153 }
154 148
155 if( bS[bS_index] == 0 ) { 149 if( bS[bS_index] == 0 ) {
156 continue; 150 continue;
157 } 151 }
158 152
159 qp_index = MB_FIELD ? (i >> 3) : (i & 1); 153 index_a = qp + h->slice_alpha_c0_offset;
160 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
161 alpha = (alpha_table+52)[index_a]; 154 alpha = (alpha_table+52)[index_a];
162 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset]; 155 beta = (beta_table+52)[qp + h->slice_beta_offset];
163 156
164 if( bS[bS_index] < 4 ) { 157 if( bS[bS_index] < 4 ) {
165 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]]; 158 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
166 const int p0 = pix[-1]; 159 const int p0 = pix[-1];
167 const int p1 = pix[-2]; 160 const int p1 = pix[-2];
234 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); 227 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
235 } 228 }
236 } 229 }
237 } 230 }
238 } 231 }
239 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { 232 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) {
240 int i; 233 int i;
241 for( i = 0; i < 8; i++, pix += stride) { 234 for( i = 0; i < 4; i++, pix += stride) {
242 int index_a; 235 int index_a;
243 int alpha; 236 int alpha;
244 int beta; 237 int beta;
245 238 const int bS_index = i*bsi;
246 int qp_index;
247 int bS_index = i;
248 239
249 if( bS[bS_index] == 0 ) { 240 if( bS[bS_index] == 0 ) {
250 continue; 241 continue;
251 } 242 }
252 243
253 qp_index = MB_FIELD ? (i >> 2) : (i & 1); 244 index_a = qp + h->slice_alpha_c0_offset;
254 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
255 alpha = (alpha_table+52)[index_a]; 245 alpha = (alpha_table+52)[index_a];
256 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset]; 246 beta = (beta_table+52)[qp + h->slice_beta_offset];
257 247
258 if( bS[bS_index] < 4 ) { 248 if( bS[bS_index] < 4 ) {
259 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1; 249 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
260 const int p0 = pix[-1]; 250 const int p0 = pix[-1];
261 const int p1 = pix[-2]; 251 const int p1 = pix[-2];
704 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1; 694 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
705 695
706 /* Filter edge */ 696 /* Filter edge */
707 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize); 697 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
708 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 698 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
709 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp ); 699 if(MB_FIELD){
710 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp ); 700 filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0] );
711 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp ); 701 filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1] );
702 filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] );
703 filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
704 filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] );
705 filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
706 }else{
707 filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0] );
708 filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1] );
709 filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] );
710 filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
711 filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] );
712 filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
713 }
712 } 714 }
713 715
714 #if CONFIG_SMALL 716 #if CONFIG_SMALL
715 for( dir = 0; dir < 2; dir++ ) 717 for( dir = 0; dir < 2; dir++ )
716 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir); 718 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);