comparison vp3.c @ 11477:445659683743 libavcodec

vp3: Split out motion vectors to their own array 1.5% faster overall decode on my penryn
author conrad
date Sat, 13 Mar 2010 10:25:41 +0000
parents b7744b7bee4b
children 8a4984c5cacc
comparison
equal deleted inserted replaced
11476:8ef285dc7f80 11477:445659683743
46 46
47 //FIXME split things out into their own arrays 47 //FIXME split things out into their own arrays
48 typedef struct Vp3Fragment { 48 typedef struct Vp3Fragment {
49 int16_t dc; 49 int16_t dc;
50 uint8_t coding_method; 50 uint8_t coding_method;
51 int8_t motion_x;
52 int8_t motion_y;
53 uint8_t qpi; 51 uint8_t qpi;
54 } Vp3Fragment; 52 } Vp3Fragment;
55 53
56 #define SB_NOT_CODED 0 54 #define SB_NOT_CODED 0
57 #define SB_PARTIALLY_CODED 1 55 #define SB_PARTIALLY_CODED 1
163 int fragment_height[2]; 161 int fragment_height[2];
164 162
165 Vp3Fragment *all_fragments; 163 Vp3Fragment *all_fragments;
166 int fragment_start[3]; 164 int fragment_start[3];
167 int data_offset[3]; 165 int data_offset[3];
166
167 int8_t (*motion_val[2])[2];
168 168
169 ScanTable scantable; 169 ScanTable scantable;
170 170
171 /* tables */ 171 /* tables */
172 uint16_t coded_dc_scale_factor[64]; 172 uint16_t coded_dc_scale_factor[64];
622 int last_motion_y = 0; 622 int last_motion_y = 0;
623 int prior_last_motion_x = 0; 623 int prior_last_motion_x = 0;
624 int prior_last_motion_y = 0; 624 int prior_last_motion_y = 0;
625 int current_macroblock; 625 int current_macroblock;
626 int current_fragment; 626 int current_fragment;
627 Vp3Fragment *frag; 627 int frag;
628 628
629 if (s->keyframe) 629 if (s->keyframe)
630 return 0; 630 return 0;
631 631
632 /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */ 632 /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */
729 /* assign the motion vectors to the correct fragments */ 729 /* assign the motion vectors to the correct fragments */
730 for (k = 0; k < 4; k++) { 730 for (k = 0; k < 4; k++) {
731 current_fragment = 731 current_fragment =
732 BLOCK_Y*s->fragment_width[0] + BLOCK_X; 732 BLOCK_Y*s->fragment_width[0] + BLOCK_X;
733 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { 733 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
734 s->all_fragments[current_fragment].motion_x = motion_x[k]; 734 s->motion_val[0][current_fragment][0] = motion_x[k];
735 s->all_fragments[current_fragment].motion_y = motion_y[k]; 735 s->motion_val[0][current_fragment][1] = motion_y[k];
736 } else { 736 } else {
737 s->all_fragments[current_fragment].motion_x = motion_x[0]; 737 s->motion_val[0][current_fragment][0] = motion_x[0];
738 s->all_fragments[current_fragment].motion_y = motion_y[0]; 738 s->motion_val[0][current_fragment][1] = motion_y[0];
739 } 739 }
740 } 740 }
741
742 #define SET_CHROMA_MV(mx, my) \
743 frag[s->fragment_start[1]].motion_x = mx; \
744 frag[s->fragment_start[1]].motion_y = my; \
745 frag[s->fragment_start[2]].motion_x = mx; \
746 frag[s->fragment_start[2]].motion_y = my
747 741
748 if (s->chroma_y_shift) { 742 if (s->chroma_y_shift) {
749 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { 743 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
750 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1] + motion_x[2] + motion_x[3], 2); 744 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1] + motion_x[2] + motion_x[3], 2);
751 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1] + motion_y[2] + motion_y[3], 2); 745 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1] + motion_y[2] + motion_y[3], 2);
752 } 746 }
753 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1); 747 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1);
754 motion_y[0] = (motion_y[0]>>1) | (motion_y[0]&1); 748 motion_y[0] = (motion_y[0]>>1) | (motion_y[0]&1);
755 frag = s->all_fragments + mb_y*s->fragment_width[1] + mb_x; 749 frag = mb_y*s->fragment_width[1] + mb_x;
756 SET_CHROMA_MV(motion_x[0], motion_y[0]); 750 s->motion_val[1][frag][0] = motion_x[0];
751 s->motion_val[1][frag][1] = motion_y[0];
757 } else if (s->chroma_x_shift) { 752 } else if (s->chroma_x_shift) {
758 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { 753 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
759 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1], 1); 754 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1], 1);
760 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1], 1); 755 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1], 1);
761 motion_x[1] = RSHIFT(motion_x[2] + motion_x[3], 1); 756 motion_x[1] = RSHIFT(motion_x[2] + motion_x[3], 1);
765 motion_y[1] = motion_y[0]; 760 motion_y[1] = motion_y[0];
766 } 761 }
767 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1); 762 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1);
768 motion_x[1] = (motion_x[1]>>1) | (motion_x[1]&1); 763 motion_x[1] = (motion_x[1]>>1) | (motion_x[1]&1);
769 764
770 frag = s->all_fragments + 2*mb_y*s->fragment_width[1] + mb_x; 765 frag = 2*mb_y*s->fragment_width[1] + mb_x;
771 for (k = 0; k < 2; k++) { 766 for (k = 0; k < 2; k++) {
772 SET_CHROMA_MV(motion_x[k], motion_y[k]); 767 s->motion_val[1][frag][0] = motion_x[k];
768 s->motion_val[1][frag][1] = motion_y[k];
773 frag += s->fragment_width[1]; 769 frag += s->fragment_width[1];
774 } 770 }
775 } else { 771 } else {
776 for (k = 0; k < 4; k++) { 772 for (k = 0; k < 4; k++) {
777 frag = s->all_fragments + BLOCK_Y*s->fragment_width[1] + BLOCK_X; 773 frag = BLOCK_Y*s->fragment_width[1] + BLOCK_X;
778 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { 774 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
779 SET_CHROMA_MV(motion_x[k], motion_y[k]); 775 s->motion_val[1][frag][0] = motion_x[k];
776 s->motion_val[1][frag][1] = motion_y[k];
780 } else { 777 } else {
781 SET_CHROMA_MV(motion_x[0], motion_y[0]); 778 s->motion_val[1][frag][0] = motion_x[0];
779 s->motion_val[1][frag][1] = motion_y[0];
782 } 780 }
783 } 781 }
784 } 782 }
785 } 783 }
786 } 784 }
1352 uint8_t * last_plane = s-> last_frame.data [plane] + s->data_offset[plane]; 1350 uint8_t * last_plane = s-> last_frame.data [plane] + s->data_offset[plane];
1353 uint8_t *golden_plane = s-> golden_frame.data [plane] + s->data_offset[plane]; 1351 uint8_t *golden_plane = s-> golden_frame.data [plane] + s->data_offset[plane];
1354 int stride = s->current_frame.linesize[plane]; 1352 int stride = s->current_frame.linesize[plane];
1355 int plane_width = s->width >> (plane && s->chroma_x_shift); 1353 int plane_width = s->width >> (plane && s->chroma_x_shift);
1356 int plane_height = s->height >> (plane && s->chroma_y_shift); 1354 int plane_height = s->height >> (plane && s->chroma_y_shift);
1355 int8_t (*motion_val)[2] = s->motion_val[!!plane];
1357 1356
1358 int sb_x, sb_y = slice << (!plane && s->chroma_y_shift); 1357 int sb_x, sb_y = slice << (!plane && s->chroma_y_shift);
1359 int slice_height = sb_y + 1 + (!plane && s->chroma_y_shift); 1358 int slice_height = sb_y + 1 + (!plane && s->chroma_y_shift);
1360 int slice_width = plane ? s->c_superblock_width : s->y_superblock_width; 1359 int slice_width = plane ? s->c_superblock_width : s->y_superblock_width;
1361 1360
1406 /* sort out the motion vector if this fragment is coded 1405 /* sort out the motion vector if this fragment is coded
1407 * using a motion vector method */ 1406 * using a motion vector method */
1408 if ((s->all_fragments[i].coding_method > MODE_INTRA) && 1407 if ((s->all_fragments[i].coding_method > MODE_INTRA) &&
1409 (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) { 1408 (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) {
1410 int src_x, src_y; 1409 int src_x, src_y;
1411 motion_x = s->all_fragments[i].motion_x; 1410 motion_x = motion_val[y*fragment_width + x][0];
1412 motion_y = s->all_fragments[i].motion_y; 1411 motion_y = motion_val[y*fragment_width + x][1];
1413 1412
1414 src_x= (motion_x>>1) + 8*x; 1413 src_x= (motion_x>>1) + 8*x;
1415 src_y= (motion_y>>1) + 8*y; 1414 src_y= (motion_y>>1) + 8*y;
1416 1415
1417 motion_halfpel_index = motion_x & 0x01; 1416 motion_halfpel_index = motion_x & 0x01;
1566 s->fragment_start[2] = y_fragment_count + c_fragment_count; 1565 s->fragment_start[2] = y_fragment_count + c_fragment_count;
1567 1566
1568 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); 1567 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
1569 s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int)); 1568 s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int));
1570 s->dct_tokens_base = av_malloc(64*s->fragment_count * sizeof(*s->dct_tokens_base)); 1569 s->dct_tokens_base = av_malloc(64*s->fragment_count * sizeof(*s->dct_tokens_base));
1570 s->motion_val[0] = av_malloc(y_fragment_count * sizeof(*s->motion_val[0]));
1571 s->motion_val[1] = av_malloc(c_fragment_count * sizeof(*s->motion_val[1]));
1572
1571 if (!s->superblock_coding || !s->all_fragments || !s->dct_tokens_base || 1573 if (!s->superblock_coding || !s->all_fragments || !s->dct_tokens_base ||
1572 !s->coded_fragment_list[0]) { 1574 !s->coded_fragment_list[0] || !s->motion_val[0] || !s->motion_val[1]) {
1573 vp3_decode_end(avctx); 1575 vp3_decode_end(avctx);
1574 return -1; 1576 return -1;
1575 } 1577 }
1576 1578
1577 if (!s->theora_tables) 1579 if (!s->theora_tables)
1872 av_free(s->all_fragments); 1874 av_free(s->all_fragments);
1873 av_free(s->coded_fragment_list[0]); 1875 av_free(s->coded_fragment_list[0]);
1874 av_free(s->dct_tokens_base); 1876 av_free(s->dct_tokens_base);
1875 av_free(s->superblock_fragments); 1877 av_free(s->superblock_fragments);
1876 av_free(s->macroblock_coding); 1878 av_free(s->macroblock_coding);
1879 av_free(s->motion_val[0]);
1880 av_free(s->motion_val[1]);
1877 1881
1878 for (i = 0; i < 16; i++) { 1882 for (i = 0; i < 16; i++) {
1879 free_vlc(&s->dc_vlc[i]); 1883 free_vlc(&s->dc_vlc[i]);
1880 free_vlc(&s->ac_vlc_1[i]); 1884 free_vlc(&s->ac_vlc_1[i]);
1881 free_vlc(&s->ac_vlc_2[i]); 1885 free_vlc(&s->ac_vlc_2[i]);