comparison dv.c @ 8011:3ec8f8660152 libavcodec

unrolling the loops as per Michael's suggestion
author romansh
date Tue, 07 Oct 2008 15:52:20 +0000
parents 4b19d9cbc75b
children 07d8986fbea7
comparison
equal deleted inserted replaced
8010:4b19d9cbc75b 8011:3ec8f8660152
365 const uint8_t *buf_ptr1, 365 const uint8_t *buf_ptr1,
366 const uint16_t *mb_pos_ptr) 366 const uint16_t *mb_pos_ptr)
367 { 367 {
368 int quant, dc, dct_mode, class1, j; 368 int quant, dc, dct_mode, class1, j;
369 int mb_index, mb_x, mb_y, v, last_index; 369 int mb_index, mb_x, mb_y, v, last_index;
370 int y_stride, i; 370 int y_stride, linesize;
371 DCTELEM *block, *block1; 371 DCTELEM *block, *block1;
372 int c_offset; 372 int c_offset;
373 uint8_t *y_ptr; 373 uint8_t *y_ptr;
374 const uint8_t *buf_ptr; 374 const uint8_t *buf_ptr;
375 PutBitContext pb, vs_pb; 375 PutBitContext pb, vs_pb;
500 500
501 /* idct_put'ting luminance */ 501 /* idct_put'ting luminance */
502 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) || 502 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) ||
503 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) || 503 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) ||
504 (s->sys->height >= 720 && mb_y != 134)) { 504 (s->sys->height >= 720 && mb_y != 134)) {
505 y_stride = (s->picture.linesize[0]<<((!is_field_mode[mb_index])*log2_blocksize)) - (2<<log2_blocksize); 505 y_stride = (s->picture.linesize[0]<<((!is_field_mode[mb_index])*log2_blocksize));
506 } else { 506 } else {
507 y_stride = 0; 507 y_stride = (2<<log2_blocksize);
508 } 508 }
509 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize); 509 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize);
510 for(j = 0; j < 2; j++, y_ptr += y_stride) { 510 linesize = s->picture.linesize[0]<<is_field_mode[mb_index];
511 for (i=0; i<2; i++, block += 64, mb++, y_ptr += (1<<log2_blocksize)) 511 mb[0] .idct_put(y_ptr , linesize, block + 0*64);
512 if (s->sys->pix_fmt == PIX_FMT_YUV422P && s->sys->width == 720 && i) 512 if (s->sys->video_stype == 4) { /* SD 422 */
513 y_ptr -= (1<<log2_blocksize); 513 mb[2].idct_put(y_ptr + (1<<log2_blocksize) , linesize, block + 2*64);
514 else 514 } else {
515 mb->idct_put(y_ptr, s->picture.linesize[0]<<is_field_mode[mb_index], block); 515 mb[1].idct_put(y_ptr + (1<<log2_blocksize) , linesize, block + 1*64);
516 } 516 mb[2].idct_put(y_ptr + y_stride, linesize, block + 2*64);
517 mb[3].idct_put(y_ptr + (1<<log2_blocksize) + y_stride, linesize, block + 3*64);
518 }
519 mb += 4;
520 block += 4*64;
517 521
518 /* idct_put'ting chrominance */ 522 /* idct_put'ting chrominance */
519 c_offset = (((mb_y>>(s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] + 523 c_offset = (((mb_y>>(s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] +
520 (mb_x>>((s->sys->pix_fmt == PIX_FMT_YUV411P)?2:1)))<<log2_blocksize); 524 (mb_x>>((s->sys->pix_fmt == PIX_FMT_YUV411P)?2:1)))<<log2_blocksize);
521 for(j=2; j; j--) { 525 for(j=2; j; j--) {
536 } 540 }
537 block += 64; mb++; 541 block += 64; mb++;
538 } else { 542 } else {
539 y_stride = (mb_y == 134) ? (1<<log2_blocksize) : 543 y_stride = (mb_y == 134) ? (1<<log2_blocksize) :
540 s->picture.linesize[j]<<((!is_field_mode[mb_index])*log2_blocksize); 544 s->picture.linesize[j]<<((!is_field_mode[mb_index])*log2_blocksize);
541 for (i=0; i<(1<<(s->sys->bpm==8)); i++, block += 64, mb++, c_ptr += y_stride) 545 linesize = s->picture.linesize[j]<<is_field_mode[mb_index];
542 mb->idct_put(c_ptr, s->picture.linesize[j]<<is_field_mode[mb_index], block); 546 (mb++)-> idct_put(c_ptr , linesize, block); block+=64;
547 if (s->sys->bpm == 8) {
548 (mb++)->idct_put(c_ptr + y_stride, linesize, block); block+=64;
549 }
543 } 550 }
544 } 551 }
545 } 552 }
546 } 553 }
547 554