changeset 11349:33f8308382f5 libavcodec

Reorder indexes in weight tables. 5 cpu cycles faster.
author michael
date Wed, 03 Mar 2010 21:10:08 +0000
parents 3c624d573966
children 7d9a1a807e91
files h264.c h264.h h264_refs.c
diffstat 3 files changed, 27 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/h264.c	Wed Mar 03 20:36:56 2010 +0000
+++ b/h264.c	Wed Mar 03 21:10:08 2010 +0000
@@ -495,14 +495,14 @@
             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
         }else{
             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
-                            h->luma_weight[0][refn0][0], h->luma_weight[1][refn1][0],
-                            h->luma_weight[0][refn0][1] + h->luma_weight[1][refn1][1]);
+                            h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
+                            h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                            h->chroma_weight[0][refn0][0][0], h->chroma_weight[1][refn1][0][0],
-                            h->chroma_weight[0][refn0][0][1] + h->chroma_weight[1][refn1][0][1]);
+                            h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
+                            h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                            h->chroma_weight[0][refn0][1][0], h->chroma_weight[1][refn1][1][0],
-                            h->chroma_weight[0][refn0][1][1] + h->chroma_weight[1][refn1][1][1]);
+                            h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
+                            h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
         }
     }else{
         int list = list1 ? 1 : 0;
@@ -513,12 +513,12 @@
                     qpix_put, chroma_put);
 
         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
-                       h->luma_weight[list][refn][0], h->luma_weight[list][refn][1]);
+                       h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
         if(h->use_weight_chroma){
             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                             h->chroma_weight[list][refn][0][0], h->chroma_weight[list][refn][0][1]);
+                             h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                             h->chroma_weight[list][refn][1][0], h->chroma_weight[list][refn][1][1]);
+                             h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
         }
     }
 }
@@ -1368,16 +1368,16 @@
 
             luma_weight_flag= get_bits1(&s->gb);
             if(luma_weight_flag){
-                h->luma_weight[list][i][0]= get_se_golomb(&s->gb);
-                h->luma_weight[list][i][1]= get_se_golomb(&s->gb);
-                if(   h->luma_weight[list][i][0] != luma_def
-                   || h->luma_weight[list][i][1] != 0) {
+                h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
+                h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
+                if(   h->luma_weight[i][list][0] != luma_def
+                   || h->luma_weight[i][list][1] != 0) {
                     h->use_weight= 1;
                     h->luma_weight_flag[list]= 1;
                 }
             }else{
-                h->luma_weight[list][i][0]= luma_def;
-                h->luma_weight[list][i][1]= 0;
+                h->luma_weight[i][list][0]= luma_def;
+                h->luma_weight[i][list][1]= 0;
             }
 
             if(CHROMA){
@@ -1385,10 +1385,10 @@
                 if(chroma_weight_flag){
                     int j;
                     for(j=0; j<2; j++){
-                        h->chroma_weight[list][i][j][0]= get_se_golomb(&s->gb);
-                        h->chroma_weight[list][i][j][1]= get_se_golomb(&s->gb);
-                        if(   h->chroma_weight[list][i][j][0] != chroma_def
-                           || h->chroma_weight[list][i][j][1] != 0) {
+                        h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
+                        h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
+                        if(   h->chroma_weight[i][list][j][0] != chroma_def
+                           || h->chroma_weight[i][list][j][1] != 0) {
                             h->use_weight_chroma= 1;
                             h->chroma_weight_flag[list]= 1;
                         }
@@ -1396,8 +1396,8 @@
                 }else{
                     int j;
                     for(j=0; j<2; j++){
-                        h->chroma_weight[list][i][j][0]= chroma_def;
-                        h->chroma_weight[list][i][j][1]= 0;
+                        h->chroma_weight[i][list][j][0]= chroma_def;
+                        h->chroma_weight[i][list][j][1]= 0;
                     }
                 }
             }
--- a/h264.h	Wed Mar 03 20:36:56 2010 +0000
+++ b/h264.h	Wed Mar 03 21:10:08 2010 +0000
@@ -374,8 +374,8 @@
     int luma_log2_weight_denom;
     int chroma_log2_weight_denom;
     //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
-    int luma_weight[2][48][2];
-    int chroma_weight[2][48][2][2];
+    int luma_weight[48][2][2];
+    int chroma_weight[48][2][2][2];
     int implicit_weight[48][48];
 
     int direct_spatial_mv_pred;
--- a/h264_refs.c	Wed Mar 03 20:36:56 2010 +0000
+++ b/h264_refs.c	Wed Mar 03 21:10:08 2010 +0000
@@ -315,11 +315,11 @@
             field[1].reference = PICT_BOTTOM_FIELD;
             field[1].poc= field[1].field_poc[1];
 
-            h->luma_weight[list][16+2*i][0] = h->luma_weight[list][16+2*i+1][0] = h->luma_weight[list][i][0];
-            h->luma_weight[list][16+2*i][1] = h->luma_weight[list][16+2*i+1][1] = h->luma_weight[list][i][1];
+            h->luma_weight[16+2*i][list][0] = h->luma_weight[16+2*i+1][list][0] = h->luma_weight[i][list][0];
+            h->luma_weight[16+2*i][list][1] = h->luma_weight[16+2*i+1][list][1] = h->luma_weight[i][list][1];
             for(j=0; j<2; j++){
-                h->chroma_weight[list][16+2*i][j][0] = h->chroma_weight[list][16+2*i+1][j][0] = h->chroma_weight[list][i][j][0];
-                h->chroma_weight[list][16+2*i][j][1] = h->chroma_weight[list][16+2*i+1][j][1] = h->chroma_weight[list][i][j][1];
+                h->chroma_weight[16+2*i][list][j][0] = h->chroma_weight[16+2*i+1][list][j][0] = h->chroma_weight[i][list][j][0];
+                h->chroma_weight[16+2*i][list][j][1] = h->chroma_weight[16+2*i+1][list][j][1] = h->chroma_weight[i][list][j][1];
             }
         }
     }