diff xvmcvideo.c @ 1580:628bf341e099 libavcodec

XvMC speedup by removing one memcpy and doing MB packing
author iive
date Mon, 27 Oct 2003 23:22:43 +0000
parents d107c545d745
children 024752284c25
line wrap: on
line diff
--- a/xvmcvideo.c	Mon Oct 27 14:37:30 2003 +0000
+++ b/xvmcvideo.c	Mon Oct 27 23:22:43 2003 +0000
@@ -41,6 +41,33 @@
 
 //#include "xvmc_debug.h"
 
+//set s->block
+inline void XVMC_init_block(MpegEncContext *s){
+xvmc_render_state_t * render;
+    render = (xvmc_render_state_t*)s->current_picture.data[2];
+    assert(render != NULL);
+    if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) ){
+        assert(0);
+        return;//make sure that this is render packet
+    }
+    s->block =(DCTELEM *)(render->data_blocks+(render->next_free_data_block_num)*64);
+}
+
+void XVMC_pack_pblocks(MpegEncContext *s, int cbp){
+int i,j;
+#define numblocks 6
+
+    j=0;
+    for(i=0;i<numblocks;i++){
+        if(cbp & (1<<(numblocks-1-i)) ){
+           s->pblocks[i] = (short *)(&s->block[(j++)]);
+        }else{
+           s->pblocks[i] = NULL;
+        }
+//        printf("s->pblocks[%d]=%p ,s->block=%p cbp=%d\n",i,s->pblocks[i],s->block,cbp);
+    }
+}
+
 static int calc_cbp(MpegEncContext *s, int blocknum){
 /* compute cbp */
 // for I420 bit_offset=5
@@ -110,7 +137,7 @@
     }
 }
 
-void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]){
+void XVMC_decode_mb(MpegEncContext *s){
 XvMCMacroBlock * mv_block;
 xvmc_render_state_t * render;
 int i,cbp,blocks_per_mb;
@@ -242,14 +269,14 @@
 */
     if(s->flags & CODEC_FLAG_GRAY){
         if(s->mb_intra){//intra frames are alwasy full chroma block
-            memset(block[4],0,sizeof(short)*8*8);//so we need to clear them
-            memset(block[5],0,sizeof(short)*8*8);
-            if(!render->unsigned_intra)
-                block[4][0] = block[5][0] = 1<<10;
-        }
-        else
+            for(i=4; i<blocks_per_mb; i++){
+                memset(s->pblocks[i],0,sizeof(short)*8*8);//so we need to clear them
+                if(!render->unsigned_intra)
+                    s->pblocks[i][0] = 1<<10;
+            }
+        }else
             blocks_per_mb = 4;//Luminance blocks only
-    };
+    }
     cbp = calc_cbp(s,blocks_per_mb);
     mv_block->coded_block_pattern = cbp;
     if(cbp == 0)
@@ -259,14 +286,24 @@
         if(s->block_last_index[i] >= 0){
             // i do not have unsigned_intra MOCO to test, hope it is OK
             if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) )
-                block[i][0]-=1<<10;
+                s->pblocks[i][0]-=1<<10;
             if(!render->idct){
-                s->dsp.idct(block[i]);
+                s->dsp.idct(s->pblocks[i]);
                 //!!TODO!clip!!!
             }
-//TODO:avoid block copy by modifying s->block pointer
-            memcpy(&render->data_blocks[(render->next_free_data_block_num++)*64],
-                    block[i],sizeof(short)*8*8);
+//copy blocks only if the codec doesn't support pblocks reordering
+            if(s->avctx->xvmc_acceleration == 1){
+                memcpy(&render->data_blocks[(render->next_free_data_block_num)*64],
+                        s->pblocks[i],sizeof(short)*8*8);
+            }else{
+/*              if(s->pblocks[i] != &render->data_blocks[
+                        (render->next_free_data_block_num)*64]){
+                   printf("ERROR mb(%d,%d) s->pblocks[i]=%p data_block[]=%p\n",
+                   s->mb_x,s->mb_y, s->pblocks[i], 
+                   &render->data_blocks[(render->next_free_data_block_num)*64]);
+                }*/
+            }
+            render->next_free_data_block_num++;
         }
     }
     render->filled_mv_blocks_num++;