comparison h264.c @ 2897:a5360f2a673e libavcodec

13% faster decode_residual (cavlc). patch by diane_cartman at gmx dot de.
author lorenm
date Tue, 27 Sep 2005 04:46:45 +0000
parents 4c6eb826e9cb
children 95f469274a1d
comparison
equal deleted inserted replaced
2896:e1dfc65af0fb 2897:a5360f2a673e
4435 * @return <0 if an error occured 4435 * @return <0 if an error occured
4436 */ 4436 */
4437 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){ 4437 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
4438 MpegEncContext * const s = &h->s; 4438 MpegEncContext * const s = &h->s;
4439 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; 4439 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4440 int level[16], run[16]; 4440 int level[16];
4441 int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones; 4441 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4442 4442
4443 //FIXME put trailing_onex into the context 4443 //FIXME put trailing_onex into the context
4444 4444
4445 if(n == CHROMA_DC_BLOCK_INDEX){ 4445 if(n == CHROMA_DC_BLOCK_INDEX){
4446 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); 4446 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4469 4469
4470 for(i=0; i<trailing_ones; i++){ 4470 for(i=0; i<trailing_ones; i++){
4471 level[i]= 1 - 2*get_bits1(gb); 4471 level[i]= 1 - 2*get_bits1(gb);
4472 } 4472 }
4473 4473
4474 suffix_length= total_coeff > 10 && trailing_ones < 3; 4474 if(i<total_coeff) {
4475
4476 for(; i<total_coeff; i++){
4477 const int prefix= get_level_prefix(gb);
4478 int level_code, mask; 4475 int level_code, mask;
4479 4476 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4477 int prefix= get_level_prefix(gb);
4478
4479 //first coefficient has suffix_length equal to 0 or 1
4480 if(prefix<14){ //FIXME try to build a large unified VLC table for all this 4480 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4481 if(suffix_length) 4481 if(suffix_length)
4482 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part 4482 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4483 else 4483 else
4484 level_code= (prefix<<suffix_length); //part 4484 level_code= (prefix<<suffix_length); //part
4493 }else{ 4493 }else{
4494 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y); 4494 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4495 return -1; 4495 return -1;
4496 } 4496 }
4497 4497
4498 if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration 4498 if(trailing_ones < 3) level_code += 2;
4499 4499
4500 suffix_length = 1;
4501 if(level_code > 5)
4502 suffix_length++;
4500 mask= -(level_code&1); 4503 mask= -(level_code&1);
4501 level[i]= (((2+level_code)>>1) ^ mask) - mask; 4504 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4502 4505 i++;
4503 if(suffix_length==0) suffix_length=1; //FIXME split first iteration 4506
4504 4507 //remaining coefficients have suffix_length > 0
4505 #if 1 4508 for(;i<total_coeff;i++) {
4506 if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++; 4509 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4507 #else 4510 prefix = get_level_prefix(gb);
4508 if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++; 4511 if(prefix<15){
4509 /* ? == prefix > 2 or sth */ 4512 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4510 #endif 4513 }else if(prefix==15){
4511 tprintf("level: %d suffix_length:%d\n", level[i], suffix_length); 4514 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4515 }else{
4516 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4517 return -1;
4518 }
4519 mask= -(level_code&1);
4520 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4521 if(level_code > suffix_limit[suffix_length])
4522 suffix_length++;
4523 }
4512 } 4524 }
4513 4525
4514 if(total_coeff == max_coeff) 4526 if(total_coeff == max_coeff)
4515 zeros_left=0; 4527 zeros_left=0;
4516 else{ 4528 else{
4517 if(n == CHROMA_DC_BLOCK_INDEX) 4529 if(n == CHROMA_DC_BLOCK_INDEX)
4518 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); 4530 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4519 else 4531 else
4520 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1); 4532 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4521 } 4533 }
4522 4534
4523 for(i=0; i<total_coeff-1; i++){ 4535 coeff_num = zeros_left + total_coeff - 1;
4524 if(zeros_left <=0) 4536 j = scantable[coeff_num];
4525 break; 4537 if(n > 24){
4526 else if(zeros_left < 7){ 4538 block[j] = level[0];
4527 run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1); 4539 for(i=1;i<total_coeff;i++) {
4528 }else{ 4540 if(zeros_left <= 0)
4529 run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); 4541 run_before = 0;
4530 } 4542 else if(zeros_left < 7){
4531 zeros_left -= run[i]; 4543 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4544 }else{
4545 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4546 }
4547 zeros_left -= run_before;
4548 coeff_num -= 1 + run_before;
4549 j= scantable[ coeff_num ];
4550
4551 block[j]= level[i];
4552 }
4553 }else{
4554 block[j] = level[0] * qmul[j];
4555 for(i=1;i<total_coeff;i++) {
4556 if(zeros_left <= 0)
4557 run_before = 0;
4558 else if(zeros_left < 7){
4559 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4560 }else{
4561 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4562 }
4563 zeros_left -= run_before;
4564 coeff_num -= 1 + run_before;
4565 j= scantable[ coeff_num ];
4566
4567 block[j]= level[i] * qmul[j];
4568 // printf("%d %d ", block[j], qmul[j]);
4569 }
4532 } 4570 }
4533 4571
4534 if(zeros_left<0){ 4572 if(zeros_left<0){
4535 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y); 4573 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4536 return -1; 4574 return -1;
4537 } 4575 }
4538 4576
4539 for(; i<total_coeff-1; i++){
4540 run[i]= 0;
4541 }
4542
4543 run[i]= zeros_left;
4544
4545 coeff_num=-1;
4546 if(n > 24){
4547 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4548 int j;
4549
4550 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4551 j= scantable[ coeff_num ];
4552
4553 block[j]= level[i];
4554 }
4555 }else{
4556 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4557 int j;
4558
4559 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4560 j= scantable[ coeff_num ];
4561
4562 block[j]= level[i] * qmul[j];
4563 // printf("%d %d ", block[j], qmul[j]);
4564 }
4565 }
4566 return 0; 4577 return 0;
4567 } 4578 }
4568 4579
4569 /** 4580 /**
4570 * decodes a P_SKIP or B_SKIP macroblock 4581 * decodes a P_SKIP or B_SKIP macroblock