changeset 11286:db94c9bc5694 libavcodec

Reorder intra4x4_pred_mode so that we can read/write 4 values at once. 3-7 cpu cycles faster
author michael
date Thu, 25 Feb 2010 14:26:12 +0000
parents 613370892df2
children c8fea332d5d9
files h264.c h264.h
diffstat 2 files changed, 7 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/h264.c	Thu Feb 25 14:02:39 2010 +0000
+++ b/h264.c	Thu Feb 25 14:26:12 2010 +0000
@@ -54,13 +54,10 @@
 void ff_h264_write_back_intra_pred_mode(H264Context *h){
     int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
 
-    mode[0]= h->intra4x4_pred_mode_cache[7+8*1];
-    mode[1]= h->intra4x4_pred_mode_cache[7+8*2];
-    mode[2]= h->intra4x4_pred_mode_cache[7+8*3];
-    mode[3]= h->intra4x4_pred_mode_cache[7+8*4];
-    mode[4]= h->intra4x4_pred_mode_cache[4+8*4];
-    mode[5]= h->intra4x4_pred_mode_cache[5+8*4];
-    mode[6]= h->intra4x4_pred_mode_cache[6+8*4];
+    AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
+    mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
+    mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
+    mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
 }
 
 /**
--- a/h264.h	Thu Feb 25 14:02:39 2010 +0000
+++ b/h264.h	Thu Feb 25 14:26:12 2010 +0000
@@ -886,11 +886,7 @@
 
             if(IS_INTRA4x4(mb_type)){
                 if(IS_INTRA4x4(top_type)){
-                    int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[top_xy];
-                    h->intra4x4_pred_mode_cache[4+8*0]= mode[4];
-                    h->intra4x4_pred_mode_cache[5+8*0]= mode[5];
-                    h->intra4x4_pred_mode_cache[6+8*0]= mode[6];
-                    h->intra4x4_pred_mode_cache[7+8*0]= mode[3];
+                    AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
                 }else{
                     int pred;
                     if(!(top_type & type_mask))
@@ -906,8 +902,8 @@
                 for(i=0; i<2; i++){
                     if(IS_INTRA4x4(left_type[i])){
                         int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]];
-                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[left_block[0+2*i]];
-                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[left_block[1+2*i]];
+                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
+                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
                     }else{
                         int pred;
                         if(!(left_type[i] & type_mask))