diff h264.h @ 10913:497929e9d912 libavcodec

Perform reference remapping at fill_cache() time instead of in the loop filter. This removes one obstacle of getting ff_h264_filter_mb_fast() bitexact. code is maybe 0.1% faster
author michael
date Mon, 18 Jan 2010 05:15:31 +0000
parents 9bdf0bb607a3
children edd918a4c0b8
line wrap: on
line diff
--- a/h264.h	Mon Jan 18 00:34:28 2010 +0000
+++ b/h264.h	Mon Jan 18 05:15:31 2010 +0000
@@ -834,11 +834,20 @@
                 }
 
                 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
+                if(for_deblock){
+                    int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
+                    ref += h->b8_stride;
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
+                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
+                }else{
                 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
                 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
                 ref += h->b8_stride;
                 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
                 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
+                }
 
                 b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
                 for(y=0; y<4; y++){
@@ -1029,16 +1038,24 @@
                 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
                 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
                 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
+                if(for_deblock){
+                    int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+                    h->ref_cache[list][scan8[0] + 0 - 1*8]=
+                    h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
+                    h->ref_cache[list][scan8[0] + 2 - 1*8]=
+                    h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
+                }else{
                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
                 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
+                }
             }else{
                 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
                 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
                 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
-                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
+                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= (((for_deblock||top_type) ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
             }
 
             for(i=0; i<2; i++){
@@ -1048,13 +1065,19 @@
                     const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
                     *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
                     *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
+                    if(for_deblock){
+                        int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[i]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+                        h->ref_cache[list][cache_idx  ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]];
+                        h->ref_cache[list][cache_idx+8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]];
+                    }else{
                     h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
                     h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
+                    }
                 }else{
                     *(uint32_t*)h->mv_cache [list][cache_idx  ]=
                     *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
                     h->ref_cache[list][cache_idx  ]=
-                    h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                    h->ref_cache[list][cache_idx+8]= (for_deblock||left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                 }
             }