changeset 1580:628bf341e099 libavcodec

XvMC speedup by removing one memcpy and doing MB packing
author iive
date Mon, 27 Oct 2003 23:22:43 +0000
parents 039cb2296de2
children d2fc92d02bf7
files mpeg12.c mpegvideo.c mpegvideo.h xvmcvideo.c
diffstat 4 files changed, 105 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/mpeg12.c	Mon Oct 27 14:37:30 2003 +0000
+++ b/mpeg12.c	Mon Oct 27 23:22:43 2003 +0000
@@ -72,6 +72,8 @@
 #ifdef HAVE_XVMC
 extern int XVMC_field_start(MpegEncContext *s, AVCodecContext *avctx);
 extern int XVMC_field_end(MpegEncContext *s);
+extern void XVMC_pack_pblocks(MpegEncContext *s,int cbp);
+extern void XVMC_init_block(s);//set s->block
 #endif
 
 #ifdef CONFIG_ENCODERS
@@ -1083,15 +1085,24 @@
         }else
             memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */
         s->mb_intra = 1;
+#ifdef HAVE_XVMC
+        //one 1 we memcpy blocks in xvmcvideo
+        if(s->avctx->xvmc_acceleration > 1){
+            XVMC_pack_pblocks(s,-1);//inter are always full blocks
+            if(s->swap_uv){
+                exchange_uv(s);
+            }
+        }
+#endif
 
         if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
             for(i=0;i<6;i++) {
-                if (mpeg2_decode_block_intra(s, block[i], i) < 0)
+                if (mpeg2_decode_block_intra(s, s->pblocks[i], i) < 0)
                     return -1;
             }
         } else {
             for(i=0;i<6;i++) {
-                if (mpeg1_decode_block_intra(s, block[i], i) < 0)
+                if (mpeg1_decode_block_intra(s, s->pblocks[i], i) < 0)
                     return -1;
             }
         }
@@ -1262,10 +1273,20 @@
             }
             cbp++;
 
+#ifdef HAVE_XVMC
+            //on 1 we memcpy blocks in xvmcvideo
+            if(s->avctx->xvmc_acceleration > 1){
+                XVMC_pack_pblocks(s,cbp);
+                if(s->swap_uv){
+                    exchange_uv(s);
+                }
+            }    
+#endif
+
             if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
                 for(i=0;i<6;i++) {
                     if (cbp & 32) {
-                        if (mpeg2_decode_block_non_intra(s, block[i], i) < 0)
+                        if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0)
                             return -1;
                     } else {
                         s->block_last_index[i] = -1;
@@ -1275,7 +1296,7 @@
             } else {
                 for(i=0;i<6;i++) {
                     if (cbp & 32) {
-                        if (mpeg1_decode_block_inter(s, block[i], i) < 0)
+                        if (mpeg1_decode_block_inter(s, s->pblocks[i], i) < 0)
                             return -1;
                     } else {
                         s->block_last_index[i] = -1;
@@ -1960,10 +1981,12 @@
     }
 }
 
-static void exchange_uv(AVFrame *f){
-    uint8_t *t= f->data[1];
-    f->data[1]= f->data[2];
-    f->data[2]= t;
+static void exchange_uv(MpegEncContext *s){
+short * tmp;
+
+    tmp = s->pblocks[4];
+    s->pblocks[4] = s->pblocks[5];
+    s->pblocks[5] = tmp;
 }
 
 #define DECODE_SLICE_FATAL_ERROR -2
@@ -2093,6 +2116,12 @@
     ff_init_block_index(s);
 
     for(;;) {
+#ifdef HAVE_XVMC
+        //one 1 we memcpy blocks in xvmcvideo
+        if(s->avctx->xvmc_acceleration > 1)
+            XVMC_init_block(s);//set s->block
+#endif
+
 	s->dsp.clear_blocks(s->block[0]);
 
         ret = mpeg_decode_mb(s, s->block);
@@ -2133,14 +2162,9 @@
         MPV_decode_mb(s, s->block);
         
         if (++s->mb_x >= s->mb_width) {
-            if(s->avctx->codec_tag == ff_get_fourcc("VCR2"))
-                exchange_uv((AVFrame*)s->current_picture_ptr);
 
             ff_draw_horiz_band(s, 16*s->mb_y, 16);
 
-            if(s->avctx->codec_tag == ff_get_fourcc("VCR2"))
-                exchange_uv((AVFrame*)s->current_picture_ptr);
-
             s->mb_x = 0;
             s->mb_y++;
 
@@ -2233,8 +2257,6 @@
                  ff_print_debug_info(s, s->last_picture_ptr);
             }
         }
-        if(s->avctx->codec_tag == ff_get_fourcc("VCR2"))
-            exchange_uv(pict);
 
         return 1;
     } else {
@@ -2294,11 +2316,13 @@
         //get_format() or set_video(width,height,aspect,pix_fmt);
         //until then pix_fmt may be changed right after codec init
         if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
-            avctx->idct_algo = FF_IDCT_SIMPLE;
+            if( avctx->idct_algo == FF_IDCT_AUTO )
+                avctx->idct_algo = FF_IDCT_SIMPLE;
 
         if (MPV_common_init(s) < 0)
             return -1;
         s1->mpeg_enc_ctx_allocated = 1;
+        s->swap_uv = 0;//just in case vcr2 and mpeg2 stream have been concatinated
     }
 
     skip_bits(&s->gb, 10); /* vbv_buffer_size */
@@ -2378,10 +2402,13 @@
     //get_format() or set_video(width,height,aspect,pix_fmt);
     //until then pix_fmt may be changed right after codec init
     if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
-        avctx->idct_algo = FF_IDCT_SIMPLE;
+        if( avctx->idct_algo == FF_IDCT_AUTO )
+            avctx->idct_algo = FF_IDCT_SIMPLE;
     
     if (MPV_common_init(s) < 0)
         return -1;
+    exchange_uv(s);//common init reset pblocks, so we swap them here
+    s->swap_uv = 1;// in case of xvmc we need to swap uv for each MB 
     s1->mpeg_enc_ctx_allocated = 1;
 
     for(i=0;i<64;i++) {
@@ -2634,14 +2661,14 @@
 
     if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) )
         return -1;
-    if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) )
+    if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) ){
         dprintf("mpeg12.c: XvMC decoder will work better if SLICE_FLAG_ALLOW_FIELD is set\n");
-
+    }
     mpeg_decode_init(avctx);
     s = avctx->priv_data;
 
     avctx->pix_fmt = PIX_FMT_XVMC_MPEG2_IDCT;
-    avctx->xvmc_acceleration = 1;
+    avctx->xvmc_acceleration = 2;//2 - the blocks are packed!
 
     return 0;
 }
--- a/mpegvideo.c	Mon Oct 27 14:37:30 2003 +0000
+++ b/mpegvideo.c	Mon Oct 27 23:22:43 2003 +0000
@@ -56,7 +56,7 @@
 #ifdef HAVE_XVMC
 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
 extern void XVMC_field_end(MpegEncContext *s);
-extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
+extern void XVMC_decode_mb(MpegEncContext *s);
 #endif
 
 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
@@ -519,6 +519,10 @@
     
     s->block= s->blocks[0];
 
+    for(i=0;i<12;i++){
+        s->pblocks[i] = (short *)(&s->block[i]);
+    }
+
     s->parse_context.state= -1;
 
     s->context_initialized = 1;
@@ -2485,7 +2489,7 @@
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
 #ifdef HAVE_XVMC
     if(s->avctx->xvmc_acceleration){
-        XVMC_decode_mb(s,block);
+        XVMC_decode_mb(s);//xvmc uses pblocks
         return;
     }
 #endif
--- a/mpegvideo.h	Mon Oct 27 14:37:30 2003 +0000
+++ b/mpegvideo.h	Mon Oct 27 23:22:43 2003 +0000
@@ -655,6 +655,8 @@
     int rtp_payload_size;
     void (*rtp_callback)(void *data, int size, int packet_number);
     uint8_t *ptr_lastgob;
+    int swap_uv;//vcr2 codec is mpeg2 varint with UV swaped
+    short * pblocks[12];
     
     DCTELEM (*block)[64]; ///< points to one of the following blocks 
     DCTELEM (*blocks)[6][64]; // for HQ mode we need to keep the best block
--- a/xvmcvideo.c	Mon Oct 27 14:37:30 2003 +0000
+++ b/xvmcvideo.c	Mon Oct 27 23:22:43 2003 +0000
@@ -41,6 +41,33 @@
 
 //#include "xvmc_debug.h"
 
+//set s->block
+inline void XVMC_init_block(MpegEncContext *s){
+xvmc_render_state_t * render;
+    render = (xvmc_render_state_t*)s->current_picture.data[2];
+    assert(render != NULL);
+    if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) ){
+        assert(0);
+        return;//make sure that this is render packet
+    }
+    s->block =(DCTELEM *)(render->data_blocks+(render->next_free_data_block_num)*64);
+}
+
+void XVMC_pack_pblocks(MpegEncContext *s, int cbp){
+int i,j;
+#define numblocks 6
+
+    j=0;
+    for(i=0;i<numblocks;i++){
+        if(cbp & (1<<(numblocks-1-i)) ){
+           s->pblocks[i] = (short *)(&s->block[(j++)]);
+        }else{
+           s->pblocks[i] = NULL;
+        }
+//        printf("s->pblocks[%d]=%p ,s->block=%p cbp=%d\n",i,s->pblocks[i],s->block,cbp);
+    }
+}
+
 static int calc_cbp(MpegEncContext *s, int blocknum){
 /* compute cbp */
 // for I420 bit_offset=5
@@ -110,7 +137,7 @@
     }
 }
 
-void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]){
+void XVMC_decode_mb(MpegEncContext *s){
 XvMCMacroBlock * mv_block;
 xvmc_render_state_t * render;
 int i,cbp,blocks_per_mb;
@@ -242,14 +269,14 @@
 */
     if(s->flags & CODEC_FLAG_GRAY){
         if(s->mb_intra){//intra frames are alwasy full chroma block
-            memset(block[4],0,sizeof(short)*8*8);//so we need to clear them
-            memset(block[5],0,sizeof(short)*8*8);
-            if(!render->unsigned_intra)
-                block[4][0] = block[5][0] = 1<<10;
-        }
-        else
+            for(i=4; i<blocks_per_mb; i++){
+                memset(s->pblocks[i],0,sizeof(short)*8*8);//so we need to clear them
+                if(!render->unsigned_intra)
+                    s->pblocks[i][0] = 1<<10;
+            }
+        }else
             blocks_per_mb = 4;//Luminance blocks only
-    };
+    }
     cbp = calc_cbp(s,blocks_per_mb);
     mv_block->coded_block_pattern = cbp;
     if(cbp == 0)
@@ -259,14 +286,24 @@
         if(s->block_last_index[i] >= 0){
             // i do not have unsigned_intra MOCO to test, hope it is OK
             if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) )
-                block[i][0]-=1<<10;
+                s->pblocks[i][0]-=1<<10;
             if(!render->idct){
-                s->dsp.idct(block[i]);
+                s->dsp.idct(s->pblocks[i]);
                 //!!TODO!clip!!!
             }
-//TODO:avoid block copy by modifying s->block pointer
-            memcpy(&render->data_blocks[(render->next_free_data_block_num++)*64],
-                    block[i],sizeof(short)*8*8);
+//copy blocks only if the codec doesn't support pblocks reordering
+            if(s->avctx->xvmc_acceleration == 1){
+                memcpy(&render->data_blocks[(render->next_free_data_block_num)*64],
+                        s->pblocks[i],sizeof(short)*8*8);
+            }else{
+/*              if(s->pblocks[i] != &render->data_blocks[
+                        (render->next_free_data_block_num)*64]){
+                   printf("ERROR mb(%d,%d) s->pblocks[i]=%p data_block[]=%p\n",
+                   s->mb_x,s->mb_y, s->pblocks[i], 
+                   &render->data_blocks[(render->next_free_data_block_num)*64]);
+                }*/
+            }
+            render->next_free_data_block_num++;
         }
     }
     render->filled_mv_blocks_num++;