comparison h264.c @ 8376:e1b242224970 libavcodec

Use the new idct functions (except chroma as it was slower in benchmarks) cathedral +0.5% speed aladin +0.6% speed [note aladin has been cat-ed 10 times to reduce the influence of init time] Speedup also verified via START/STOP_TIMER (difference was very significant for the changed parts)
author michael
date Thu, 18 Dec 2008 02:53:18 +0000
parents 9000fd7c166e
children 63f491ce374b
comparison
equal deleted inserted replaced
8375:de2509cf3c44 8376:e1b242224970
2513 2513
2514 2514
2515 if(!IS_INTRA4x4(mb_type)){ 2515 if(!IS_INTRA4x4(mb_type)){
2516 if(is_h264){ 2516 if(is_h264){
2517 if(IS_INTRA16x16(mb_type)){ 2517 if(IS_INTRA16x16(mb_type)){
2518 if(transform_bypass && h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ 2518 if(transform_bypass){
2519 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2519 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); 2520 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2520 }else{ 2521 }else{
2521 for(i=0; i<16; i++){ 2522 for(i=0; i<16; i++){
2522 if(h->non_zero_count_cache[ scan8[i] ]) 2523 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2523 idct_add (dest_y + block_offset[i], h->mb + i*16, linesize); 2524 idct_add (dest_y + block_offset[i], h->mb + i*16, linesize);
2524 else if(h->mb[i*16])
2525 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2526 } 2525 }
2526 }
2527 }else{
2528 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2527 } 2529 }
2528 }else if(h->cbp&15){ 2530 }else if(h->cbp&15){
2531 if(transform_bypass){
2529 const int di = IS_8x8DCT(mb_type) ? 4 : 1; 2532 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2530 for(i=0; i<16; i+=di){ 2533 for(i=0; i<16; i+=di){
2531 int nnz = h->non_zero_count_cache[ scan8[i] ]; 2534 int nnz = h->non_zero_count_cache[ scan8[i] ];
2532 if(nnz){ 2535 if(nnz){
2533 if(nnz==1 && h->mb[i*16])
2534 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2535 else
2536 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); 2536 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2537 }
2538 }
2539 }else{
2540 if(IS_8x8DCT(mb_type)){
2541 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2542 }else{
2543 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2537 } 2544 }
2538 } 2545 }
2539 } 2546 }
2540 }else{ 2547 }else{
2541 for(i=0; i<16; i++){ 2548 for(i=0; i<16; i++){