libavcodec.hg: h264.c comparison

comparison h264.c @ 8376:e1b242224970 libavcodec

Use the new idct functions (except chroma as it was slower in benchmarks) cathedral +0.5% speed aladin +0.6% speed [note aladin has been cat-ed 10 times to reduce the influence of init time] Speedup also verified via START/STOP_TIMER (difference was very significant for the changed parts)

author	michael
date	Thu, 18 Dec 2008 02:53:18 +0000
parents	9000fd7c166e
children	63f491ce374b

comparison

equal deleted inserted replaced

-:de2509cf3c44
+:e1b242224970
 if(!IS_INTRA4x4(mb_type)){
 if(is_h264){
 if(IS_INTRA16x16(mb_type)){
-if(transform_bypass && h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
+if(transform_bypass){
+if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
 }else{
 for(i=0; i<16; i++){
-if(h->non_zero_count_cache[ scan8[i] ])
+if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
 idct_add   (dest_y + block_offset[i], h->mb + i*16, linesize);
-else if(h->mb[i*16])
-idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
 }
+}
+}else{
+s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
 }
 }else if(h->cbp&15){
+if(transform_bypass){
 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
 for(i=0; i<16; i+=di){
 int nnz = h->non_zero_count_cache[ scan8[i] ];
 if(nnz){
-if(nnz==1 && h->mb[i*16])
-idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
-else
 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+}
+}
+}else{
+if(IS_8x8DCT(mb_type)){
+s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+}else{
+s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
 }
 }
 }
 }else{
 for(i=0; i<16; i++){

Mercurial > libavcodec.hg

comparison h264.c @ 8376:e1b242224970 libavcodec