comparison h264.c @ 7180:4acdafd254bf libavcodec

Clone decode_cabac_residual() for DC residuals, to get rid of a bunch of branches in the main loop.
author astrange
date Wed, 02 Jul 2008 22:08:12 +0000
parents 847e1fc5c2a1
children 069ef3bf1cef
comparison
equal deleted inserted replaced
7179:847e1fc5c2a1 7180:4acdafd254bf
5335 } 5335 }
5336 } 5336 }
5337 return get_cabac_bypass_sign( &h->cabac, -mvd ); 5337 return get_cabac_bypass_sign( &h->cabac, -mvd );
5338 } 5338 }
5339 5339
5340 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) { 5340 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5341 int nza, nzb; 5341 int nza, nzb;
5342 int ctx = 0; 5342 int ctx = 0;
5343 5343
5344 if( is_dc ) {
5344 if( cat == 0 ) { 5345 if( cat == 0 ) {
5345 nza = h->left_cbp&0x100; 5346 nza = h->left_cbp&0x100;
5346 nzb = h-> top_cbp&0x100; 5347 nzb = h-> top_cbp&0x100;
5347 } else if( cat == 1 || cat == 2 ) { 5348 } else {
5349 nza = (h->left_cbp>>(6+idx))&0x01;
5350 nzb = (h-> top_cbp>>(6+idx))&0x01;
5351 }
5352 } else {
5353 if( cat == 1 || cat == 2 ) {
5348 nza = h->non_zero_count_cache[scan8[idx] - 1]; 5354 nza = h->non_zero_count_cache[scan8[idx] - 1];
5349 nzb = h->non_zero_count_cache[scan8[idx] - 8]; 5355 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5350 } else if( cat == 3 ) {
5351 nza = (h->left_cbp>>(6+idx))&0x01;
5352 nzb = (h-> top_cbp>>(6+idx))&0x01;
5353 } else { 5356 } else {
5354 assert(cat == 4); 5357 assert(cat == 4);
5355 nza = h->non_zero_count_cache[scan8[16+idx] - 1]; 5358 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5356 nzb = h->non_zero_count_cache[scan8[16+idx] - 8]; 5359 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5360 }
5357 } 5361 }
5358 5362
5359 if( nza > 0 ) 5363 if( nza > 0 )
5360 ctx++; 5364 ctx++;
5361 5365
5370 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5374 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5371 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5375 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5372 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 5376 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5373 }; 5377 };
5374 5378
5375 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) { 5379 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5376 static const int significant_coeff_flag_offset[2][6] = { 5380 static const int significant_coeff_flag_offset[2][6] = {
5377 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 }, 5381 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5378 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 } 5382 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5379 }; 5383 };
5380 static const int last_coeff_flag_offset[2][6] = { 5384 static const int last_coeff_flag_offset[2][6] = {
5438 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx 5442 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5439 * 5-> Luma8x8 n = 4 * luma8x8idx 5443 * 5-> Luma8x8 n = 4 * luma8x8idx
5440 */ 5444 */
5441 5445
5442 /* read coded block flag */ 5446 /* read coded block flag */
5443 if( cat != 5 ) { 5447 if( is_dc || cat != 5 ) {
5444 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) { 5448 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5449 if( !is_dc ) {
5445 if( cat == 1 || cat == 2 ) 5450 if( cat == 1 || cat == 2 )
5446 h->non_zero_count_cache[scan8[n]] = 0; 5451 h->non_zero_count_cache[scan8[n]] = 0;
5447 else if( cat == 4 ) 5452 else
5448 h->non_zero_count_cache[scan8[16+n]] = 0; 5453 h->non_zero_count_cache[scan8[16+n]] = 0;
5454 }
5455
5449 #ifdef CABAC_ON_STACK 5456 #ifdef CABAC_ON_STACK
5450 h->cabac.range = cc.range ; 5457 h->cabac.range = cc.range ;
5451 h->cabac.low = cc.low ; 5458 h->cabac.low = cc.low ;
5452 h->cabac.bytestream= cc.bytestream; 5459 h->cabac.bytestream= cc.bytestream;
5453 #endif 5460 #endif
5460 last_coeff_ctx_base = h->cabac_state 5467 last_coeff_ctx_base = h->cabac_state
5461 + last_coeff_flag_offset[MB_FIELD][cat]; 5468 + last_coeff_flag_offset[MB_FIELD][cat];
5462 abs_level_m1_ctx_base = h->cabac_state 5469 abs_level_m1_ctx_base = h->cabac_state
5463 + coeff_abs_level_m1_offset[cat]; 5470 + coeff_abs_level_m1_offset[cat];
5464 5471
5465 if( cat == 5 ) { 5472 if( !is_dc && cat == 5 ) {
5466 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \ 5473 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5467 for(last= 0; last < coefs; last++) { \ 5474 for(last= 0; last < coefs; last++) { \
5468 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \ 5475 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5469 if( get_cabac( CC, sig_ctx )) { \ 5476 if( get_cabac( CC, sig_ctx )) { \
5470 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \ 5477 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5489 DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); 5496 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5490 #endif 5497 #endif
5491 } 5498 }
5492 assert(coeff_count > 0); 5499 assert(coeff_count > 0);
5493 5500
5501 if( is_dc ) {
5494 if( cat == 0 ) 5502 if( cat == 0 )
5495 h->cbp_table[h->mb_xy] |= 0x100; 5503 h->cbp_table[h->mb_xy] |= 0x100;
5496 else if( cat == 1 || cat == 2 ) 5504 else
5505 h->cbp_table[h->mb_xy] |= 0x40 << n;
5506 } else {
5507 if( cat == 1 || cat == 2 )
5497 h->non_zero_count_cache[scan8[n]] = coeff_count; 5508 h->non_zero_count_cache[scan8[n]] = coeff_count;
5498 else if( cat == 3 )
5499 h->cbp_table[h->mb_xy] |= 0x40 << n;
5500 else if( cat == 4 ) 5509 else if( cat == 4 )
5501 h->non_zero_count_cache[scan8[16+n]] = coeff_count; 5510 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5502 else { 5511 else {
5503 assert( cat == 5 ); 5512 assert( cat == 5 );
5504 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1); 5513 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5505 } 5514 }
5515 }
5506 5516
5507 for( coeff_count--; coeff_count >= 0; coeff_count-- ) { 5517 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5508 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; 5518 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5509 5519
5510 int j= scantable[index[coeff_count]]; 5520 int j= scantable[index[coeff_count]];
5511 5521
5512 if( get_cabac( CC, ctx ) == 0 ) { 5522 if( get_cabac( CC, ctx ) == 0 ) {
5513 node_ctx = coeff_abs_level_transition[0][node_ctx]; 5523 node_ctx = coeff_abs_level_transition[0][node_ctx];
5514 if( !qmul ) { 5524 if( is_dc ) {
5515 block[j] = get_cabac_bypass_sign( CC, -1); 5525 block[j] = get_cabac_bypass_sign( CC, -1);
5516 }else{ 5526 }else{
5517 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; 5527 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5518 } 5528 }
5519 } else { 5529 } else {
5536 coeff_abs += coeff_abs + get_cabac_bypass( CC ); 5546 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5537 } 5547 }
5538 coeff_abs+= 14; 5548 coeff_abs+= 14;
5539 } 5549 }
5540 5550
5541 if( !qmul ) { 5551 if( is_dc ) {
5542 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs; 5552 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5543 else block[j] = coeff_abs; 5553 else block[j] = coeff_abs;
5544 }else{ 5554 }else{
5545 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6; 5555 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5546 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6; 5556 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5551 h->cabac.range = cc.range ; 5561 h->cabac.range = cc.range ;
5552 h->cabac.low = cc.low ; 5562 h->cabac.low = cc.low ;
5553 h->cabac.bytestream= cc.bytestream; 5563 h->cabac.bytestream= cc.bytestream;
5554 #endif 5564 #endif
5555 5565
5566 }
5567
5568 #ifndef CONFIG_SMALL
5569 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5570 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5571 }
5572
5573 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5574 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5575 }
5576 #endif
5577
5578 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5579 #ifdef CONFIG_SMALL
5580 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5581 #else
5582 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5583 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5584 #endif
5556 } 5585 }
5557 5586
5558 static inline void compute_mb_neighbors(H264Context *h) 5587 static inline void compute_mb_neighbors(H264Context *h)
5559 { 5588 {
5560 MpegEncContext * const s = &h->s; 5589 MpegEncContext * const s = &h->s;