changeset 11034:fd5921186064 libavcodec

Make the fast loop filter path work with unavailable left MBs. This prevents the issue with having to switch between slow and fast code paths in each row. 0.5% faster loopfilter for cathedral
author michael
date Thu, 28 Jan 2010 02:15:25 +0000
parents b5577677b97d
children 4debec8a15fa
files h264_loopfilter.c
diffstat 1 files changed, 12 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/h264_loopfilter.c	Thu Jan 28 01:31:06 2010 +0000
+++ b/h264_loopfilter.c	Thu Jan 28 02:15:25 2010 +0000
@@ -308,16 +308,17 @@
 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
     MpegEncContext * const s = &h->s;
     int mb_xy;
-    int mb_type;
+    int mb_type, left_type;
     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
 
     mb_xy = h->mb_xy;
 
-    if(!h->top_type || !h->left_type[0] || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
+    if(!h->top_type || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
         return;
     }
     assert(!FRAME_MBAFF);
+    left_type= h->left_type[0];
 
     mb_type = s->current_picture.mb_type[mb_xy];
     qp = s->current_picture.qscale_table[mb_xy];
@@ -340,11 +341,13 @@
         int16_t bS3[4] = {3,3,3,3};
         int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
         if( IS_8x8DCT(mb_type) ) {
+            if(left_type)
             filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
             filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
         } else {
+            if(left_type)
             filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
             filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h);
             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
@@ -354,9 +357,11 @@
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
             filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
         }
-        filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
+        if(left_type){
+            filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
+            filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
+        }
         filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
-        filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
         filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
         filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
         filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
@@ -395,14 +400,17 @@
             }\
         }
         if( edges == 1 ) {
+            if(left_type)
             FILTER(v,0,0);
             FILTER(h,1,0);
         } else if( IS_8x8DCT(mb_type) ) {
+            if(left_type)
             FILTER(v,0,0);
             FILTER(v,0,2);
             FILTER(h,1,0);
             FILTER(h,1,2);
         } else {
+            if(left_type)
             FILTER(v,0,0);
             FILTER(v,0,1);
             FILTER(v,0,2);