Mercurial > libavcodec.hg
comparison h264.c @ 8376:e1b242224970 libavcodec
Use the new idct functions (except chroma as it was slower in benchmarks)
cathedral +0.5% speed
aladin +0.6% speed [note aladin has been cat-ed 10 times to reduce the influence
of init time]
Speedup also verified via START/STOP_TIMER (difference was very significant
for the changed parts)
author | michael |
---|---|
date | Thu, 18 Dec 2008 02:53:18 +0000 |
parents | 9000fd7c166e |
children | 63f491ce374b |
comparison
equal
deleted
inserted
replaced
8375:de2509cf3c44 | 8376:e1b242224970 |
---|---|
2513 | 2513 |
2514 | 2514 |
2515 if(!IS_INTRA4x4(mb_type)){ | 2515 if(!IS_INTRA4x4(mb_type)){ |
2516 if(is_h264){ | 2516 if(is_h264){ |
2517 if(IS_INTRA16x16(mb_type)){ | 2517 if(IS_INTRA16x16(mb_type)){ |
2518 if(transform_bypass && h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ | 2518 if(transform_bypass){ |
2519 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ | |
2519 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); | 2520 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); |
2520 }else{ | 2521 }else{ |
2521 for(i=0; i<16; i++){ | 2522 for(i=0; i<16; i++){ |
2522 if(h->non_zero_count_cache[ scan8[i] ]) | 2523 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) |
2523 idct_add (dest_y + block_offset[i], h->mb + i*16, linesize); | 2524 idct_add (dest_y + block_offset[i], h->mb + i*16, linesize); |
2524 else if(h->mb[i*16]) | |
2525 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize); | |
2526 } | 2525 } |
2526 } | |
2527 }else{ | |
2528 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); | |
2527 } | 2529 } |
2528 }else if(h->cbp&15){ | 2530 }else if(h->cbp&15){ |
2531 if(transform_bypass){ | |
2529 const int di = IS_8x8DCT(mb_type) ? 4 : 1; | 2532 const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
2530 for(i=0; i<16; i+=di){ | 2533 for(i=0; i<16; i+=di){ |
2531 int nnz = h->non_zero_count_cache[ scan8[i] ]; | 2534 int nnz = h->non_zero_count_cache[ scan8[i] ]; |
2532 if(nnz){ | 2535 if(nnz){ |
2533 if(nnz==1 && h->mb[i*16]) | |
2534 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize); | |
2535 else | |
2536 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); | 2536 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); |
2537 } | |
2538 } | |
2539 }else{ | |
2540 if(IS_8x8DCT(mb_type)){ | |
2541 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); | |
2542 }else{ | |
2543 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); | |
2537 } | 2544 } |
2538 } | 2545 } |
2539 } | 2546 } |
2540 }else{ | 2547 }else{ |
2541 for(i=0; i<16; i++){ | 2548 for(i=0; i<16; i++){ |