comparison h264.c @ 4040:9eaea06c5ba6 libavcodec

optimize sign decoding code in decode_residual() x86 is 4% faster on P3 C sign stuff + x86 code for everything else is also faster then before (sorry forgot to test pure C) ... and if i replace the second occurance of the sign decoding in decode_residual by the asm too then everything gets slower iam starting to think that it might be best to write the whole function in asm, playing this avoid random deoptimizations game with gcc is not fun at all
author michael
date Thu, 19 Oct 2006 01:19:03 +0000
parents 16697bdf4ac2
children 0113ceb0f953
comparison
equal deleted inserted replaced
4039:866a83726985 4040:9eaea06c5ba6
6166 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base; 6166 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6167 int j= scantable[index[i]]; 6167 int j= scantable[index[i]];
6168 6168
6169 if( get_cabac( CC, ctx ) == 0 ) { 6169 if( get_cabac( CC, ctx ) == 0 ) {
6170 if( !qmul ) { 6170 if( !qmul ) {
6171 if( get_cabac_bypass( CC ) ) block[j] = -1; 6171 block[j] = get_cabac_bypass_sign( CC, -1);
6172 else block[j] = 1;
6173 }else{ 6172 }else{
6174 if( get_cabac_bypass( CC ) ) block[j] = (-qmul[j] + 32) >> 6; 6173 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6175 else block[j] = ( qmul[j] + 32) >> 6;
6176 } 6174 }
6177 6175
6178 abslevel1++; 6176 abslevel1++;
6179 } else { 6177 } else {
6180 int coeff_abs = 2; 6178 int coeff_abs = 2;