comparison xvmcvideo.c @ 1580:628bf341e099 libavcodec

XvMC speedup by removing one memcpy and doing MB packing
author iive
date Mon, 27 Oct 2003 23:22:43 +0000
parents d107c545d745
children 024752284c25
comparison
equal deleted inserted replaced
1579:039cb2296de2 1580:628bf341e099
38 //XvMC emulation could be performed 38 //XvMC emulation could be performed
39 39
40 #include "xvmc_render.h" 40 #include "xvmc_render.h"
41 41
42 //#include "xvmc_debug.h" 42 //#include "xvmc_debug.h"
43
44 //set s->block
45 inline void XVMC_init_block(MpegEncContext *s){
46 xvmc_render_state_t * render;
47 render = (xvmc_render_state_t*)s->current_picture.data[2];
48 assert(render != NULL);
49 if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) ){
50 assert(0);
51 return;//make sure that this is render packet
52 }
53 s->block =(DCTELEM *)(render->data_blocks+(render->next_free_data_block_num)*64);
54 }
55
56 void XVMC_pack_pblocks(MpegEncContext *s, int cbp){
57 int i,j;
58 #define numblocks 6
59
60 j=0;
61 for(i=0;i<numblocks;i++){
62 if(cbp & (1<<(numblocks-1-i)) ){
63 s->pblocks[i] = (short *)(&s->block[(j++)]);
64 }else{
65 s->pblocks[i] = NULL;
66 }
67 // printf("s->pblocks[%d]=%p ,s->block=%p cbp=%d\n",i,s->pblocks[i],s->block,cbp);
68 }
69 }
43 70
44 static int calc_cbp(MpegEncContext *s, int blocknum){ 71 static int calc_cbp(MpegEncContext *s, int blocknum){
45 /* compute cbp */ 72 /* compute cbp */
46 // for I420 bit_offset=5 73 // for I420 bit_offset=5
47 int i,cbp = 0; 74 int i,cbp = 0;
108 // printf("xvmcvideo.c: rendering %d left blocks after last slice!!!\n",render->filled_mv_blocks_num ); 135 // printf("xvmcvideo.c: rendering %d left blocks after last slice!!!\n",render->filled_mv_blocks_num );
109 ff_draw_horiz_band(s,0,0); 136 ff_draw_horiz_band(s,0,0);
110 } 137 }
111 } 138 }
112 139
113 void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]){ 140 void XVMC_decode_mb(MpegEncContext *s){
114 XvMCMacroBlock * mv_block; 141 XvMCMacroBlock * mv_block;
115 xvmc_render_state_t * render; 142 xvmc_render_state_t * render;
116 int i,cbp,blocks_per_mb; 143 int i,cbp,blocks_per_mb;
117 144
118 const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; 145 const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
240 break; 267 break;
241 } 268 }
242 */ 269 */
243 if(s->flags & CODEC_FLAG_GRAY){ 270 if(s->flags & CODEC_FLAG_GRAY){
244 if(s->mb_intra){//intra frames are alwasy full chroma block 271 if(s->mb_intra){//intra frames are alwasy full chroma block
245 memset(block[4],0,sizeof(short)*8*8);//so we need to clear them 272 for(i=4; i<blocks_per_mb; i++){
246 memset(block[5],0,sizeof(short)*8*8); 273 memset(s->pblocks[i],0,sizeof(short)*8*8);//so we need to clear them
247 if(!render->unsigned_intra) 274 if(!render->unsigned_intra)
248 block[4][0] = block[5][0] = 1<<10; 275 s->pblocks[i][0] = 1<<10;
249 } 276 }
250 else 277 }else
251 blocks_per_mb = 4;//Luminance blocks only 278 blocks_per_mb = 4;//Luminance blocks only
252 }; 279 }
253 cbp = calc_cbp(s,blocks_per_mb); 280 cbp = calc_cbp(s,blocks_per_mb);
254 mv_block->coded_block_pattern = cbp; 281 mv_block->coded_block_pattern = cbp;
255 if(cbp == 0) 282 if(cbp == 0)
256 mv_block->macroblock_type &= ~XVMC_MB_TYPE_PATTERN; 283 mv_block->macroblock_type &= ~XVMC_MB_TYPE_PATTERN;
257 284
258 for(i=0; i<blocks_per_mb; i++){ 285 for(i=0; i<blocks_per_mb; i++){
259 if(s->block_last_index[i] >= 0){ 286 if(s->block_last_index[i] >= 0){
260 // i do not have unsigned_intra MOCO to test, hope it is OK 287 // i do not have unsigned_intra MOCO to test, hope it is OK
261 if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) ) 288 if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) )
262 block[i][0]-=1<<10; 289 s->pblocks[i][0]-=1<<10;
263 if(!render->idct){ 290 if(!render->idct){
264 s->dsp.idct(block[i]); 291 s->dsp.idct(s->pblocks[i]);
265 //!!TODO!clip!!! 292 //!!TODO!clip!!!
266 } 293 }
267 //TODO:avoid block copy by modifying s->block pointer 294 //copy blocks only if the codec doesn't support pblocks reordering
268 memcpy(&render->data_blocks[(render->next_free_data_block_num++)*64], 295 if(s->avctx->xvmc_acceleration == 1){
269 block[i],sizeof(short)*8*8); 296 memcpy(&render->data_blocks[(render->next_free_data_block_num)*64],
297 s->pblocks[i],sizeof(short)*8*8);
298 }else{
299 /* if(s->pblocks[i] != &render->data_blocks[
300 (render->next_free_data_block_num)*64]){
301 printf("ERROR mb(%d,%d) s->pblocks[i]=%p data_block[]=%p\n",
302 s->mb_x,s->mb_y, s->pblocks[i],
303 &render->data_blocks[(render->next_free_data_block_num)*64]);
304 }*/
305 }
306 render->next_free_data_block_num++;
270 } 307 }
271 } 308 }
272 render->filled_mv_blocks_num++; 309 render->filled_mv_blocks_num++;
273 310
274 assert(render->filled_mv_blocks_num <= render->total_number_of_mv_blocks); 311 assert(render->filled_mv_blocks_num <= render->total_number_of_mv_blocks);