Mercurial > libavcodec.hg
comparison vp3.c @ 11477:445659683743 libavcodec
vp3: Split out motion vectors to their own array
1.5% faster overall decode on my penryn
author | conrad |
---|---|
date | Sat, 13 Mar 2010 10:25:41 +0000 |
parents | b7744b7bee4b |
children | 8a4984c5cacc |
comparison
equal
deleted
inserted
replaced
11476:8ef285dc7f80 | 11477:445659683743 |
---|---|
46 | 46 |
47 //FIXME split things out into their own arrays | 47 //FIXME split things out into their own arrays |
48 typedef struct Vp3Fragment { | 48 typedef struct Vp3Fragment { |
49 int16_t dc; | 49 int16_t dc; |
50 uint8_t coding_method; | 50 uint8_t coding_method; |
51 int8_t motion_x; | |
52 int8_t motion_y; | |
53 uint8_t qpi; | 51 uint8_t qpi; |
54 } Vp3Fragment; | 52 } Vp3Fragment; |
55 | 53 |
56 #define SB_NOT_CODED 0 | 54 #define SB_NOT_CODED 0 |
57 #define SB_PARTIALLY_CODED 1 | 55 #define SB_PARTIALLY_CODED 1 |
163 int fragment_height[2]; | 161 int fragment_height[2]; |
164 | 162 |
165 Vp3Fragment *all_fragments; | 163 Vp3Fragment *all_fragments; |
166 int fragment_start[3]; | 164 int fragment_start[3]; |
167 int data_offset[3]; | 165 int data_offset[3]; |
166 | |
167 int8_t (*motion_val[2])[2]; | |
168 | 168 |
169 ScanTable scantable; | 169 ScanTable scantable; |
170 | 170 |
171 /* tables */ | 171 /* tables */ |
172 uint16_t coded_dc_scale_factor[64]; | 172 uint16_t coded_dc_scale_factor[64]; |
622 int last_motion_y = 0; | 622 int last_motion_y = 0; |
623 int prior_last_motion_x = 0; | 623 int prior_last_motion_x = 0; |
624 int prior_last_motion_y = 0; | 624 int prior_last_motion_y = 0; |
625 int current_macroblock; | 625 int current_macroblock; |
626 int current_fragment; | 626 int current_fragment; |
627 Vp3Fragment *frag; | 627 int frag; |
628 | 628 |
629 if (s->keyframe) | 629 if (s->keyframe) |
630 return 0; | 630 return 0; |
631 | 631 |
632 /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */ | 632 /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */ |
729 /* assign the motion vectors to the correct fragments */ | 729 /* assign the motion vectors to the correct fragments */ |
730 for (k = 0; k < 4; k++) { | 730 for (k = 0; k < 4; k++) { |
731 current_fragment = | 731 current_fragment = |
732 BLOCK_Y*s->fragment_width[0] + BLOCK_X; | 732 BLOCK_Y*s->fragment_width[0] + BLOCK_X; |
733 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { | 733 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { |
734 s->all_fragments[current_fragment].motion_x = motion_x[k]; | 734 s->motion_val[0][current_fragment][0] = motion_x[k]; |
735 s->all_fragments[current_fragment].motion_y = motion_y[k]; | 735 s->motion_val[0][current_fragment][1] = motion_y[k]; |
736 } else { | 736 } else { |
737 s->all_fragments[current_fragment].motion_x = motion_x[0]; | 737 s->motion_val[0][current_fragment][0] = motion_x[0]; |
738 s->all_fragments[current_fragment].motion_y = motion_y[0]; | 738 s->motion_val[0][current_fragment][1] = motion_y[0]; |
739 } | 739 } |
740 } | 740 } |
741 | |
742 #define SET_CHROMA_MV(mx, my) \ | |
743 frag[s->fragment_start[1]].motion_x = mx; \ | |
744 frag[s->fragment_start[1]].motion_y = my; \ | |
745 frag[s->fragment_start[2]].motion_x = mx; \ | |
746 frag[s->fragment_start[2]].motion_y = my | |
747 | 741 |
748 if (s->chroma_y_shift) { | 742 if (s->chroma_y_shift) { |
749 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { | 743 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { |
750 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1] + motion_x[2] + motion_x[3], 2); | 744 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1] + motion_x[2] + motion_x[3], 2); |
751 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1] + motion_y[2] + motion_y[3], 2); | 745 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1] + motion_y[2] + motion_y[3], 2); |
752 } | 746 } |
753 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1); | 747 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1); |
754 motion_y[0] = (motion_y[0]>>1) | (motion_y[0]&1); | 748 motion_y[0] = (motion_y[0]>>1) | (motion_y[0]&1); |
755 frag = s->all_fragments + mb_y*s->fragment_width[1] + mb_x; | 749 frag = mb_y*s->fragment_width[1] + mb_x; |
756 SET_CHROMA_MV(motion_x[0], motion_y[0]); | 750 s->motion_val[1][frag][0] = motion_x[0]; |
751 s->motion_val[1][frag][1] = motion_y[0]; | |
757 } else if (s->chroma_x_shift) { | 752 } else if (s->chroma_x_shift) { |
758 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { | 753 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { |
759 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1], 1); | 754 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1], 1); |
760 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1], 1); | 755 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1], 1); |
761 motion_x[1] = RSHIFT(motion_x[2] + motion_x[3], 1); | 756 motion_x[1] = RSHIFT(motion_x[2] + motion_x[3], 1); |
765 motion_y[1] = motion_y[0]; | 760 motion_y[1] = motion_y[0]; |
766 } | 761 } |
767 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1); | 762 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1); |
768 motion_x[1] = (motion_x[1]>>1) | (motion_x[1]&1); | 763 motion_x[1] = (motion_x[1]>>1) | (motion_x[1]&1); |
769 | 764 |
770 frag = s->all_fragments + 2*mb_y*s->fragment_width[1] + mb_x; | 765 frag = 2*mb_y*s->fragment_width[1] + mb_x; |
771 for (k = 0; k < 2; k++) { | 766 for (k = 0; k < 2; k++) { |
772 SET_CHROMA_MV(motion_x[k], motion_y[k]); | 767 s->motion_val[1][frag][0] = motion_x[k]; |
768 s->motion_val[1][frag][1] = motion_y[k]; | |
773 frag += s->fragment_width[1]; | 769 frag += s->fragment_width[1]; |
774 } | 770 } |
775 } else { | 771 } else { |
776 for (k = 0; k < 4; k++) { | 772 for (k = 0; k < 4; k++) { |
777 frag = s->all_fragments + BLOCK_Y*s->fragment_width[1] + BLOCK_X; | 773 frag = BLOCK_Y*s->fragment_width[1] + BLOCK_X; |
778 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { | 774 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { |
779 SET_CHROMA_MV(motion_x[k], motion_y[k]); | 775 s->motion_val[1][frag][0] = motion_x[k]; |
776 s->motion_val[1][frag][1] = motion_y[k]; | |
780 } else { | 777 } else { |
781 SET_CHROMA_MV(motion_x[0], motion_y[0]); | 778 s->motion_val[1][frag][0] = motion_x[0]; |
779 s->motion_val[1][frag][1] = motion_y[0]; | |
782 } | 780 } |
783 } | 781 } |
784 } | 782 } |
785 } | 783 } |
786 } | 784 } |
1352 uint8_t * last_plane = s-> last_frame.data [plane] + s->data_offset[plane]; | 1350 uint8_t * last_plane = s-> last_frame.data [plane] + s->data_offset[plane]; |
1353 uint8_t *golden_plane = s-> golden_frame.data [plane] + s->data_offset[plane]; | 1351 uint8_t *golden_plane = s-> golden_frame.data [plane] + s->data_offset[plane]; |
1354 int stride = s->current_frame.linesize[plane]; | 1352 int stride = s->current_frame.linesize[plane]; |
1355 int plane_width = s->width >> (plane && s->chroma_x_shift); | 1353 int plane_width = s->width >> (plane && s->chroma_x_shift); |
1356 int plane_height = s->height >> (plane && s->chroma_y_shift); | 1354 int plane_height = s->height >> (plane && s->chroma_y_shift); |
1355 int8_t (*motion_val)[2] = s->motion_val[!!plane]; | |
1357 | 1356 |
1358 int sb_x, sb_y = slice << (!plane && s->chroma_y_shift); | 1357 int sb_x, sb_y = slice << (!plane && s->chroma_y_shift); |
1359 int slice_height = sb_y + 1 + (!plane && s->chroma_y_shift); | 1358 int slice_height = sb_y + 1 + (!plane && s->chroma_y_shift); |
1360 int slice_width = plane ? s->c_superblock_width : s->y_superblock_width; | 1359 int slice_width = plane ? s->c_superblock_width : s->y_superblock_width; |
1361 | 1360 |
1406 /* sort out the motion vector if this fragment is coded | 1405 /* sort out the motion vector if this fragment is coded |
1407 * using a motion vector method */ | 1406 * using a motion vector method */ |
1408 if ((s->all_fragments[i].coding_method > MODE_INTRA) && | 1407 if ((s->all_fragments[i].coding_method > MODE_INTRA) && |
1409 (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) { | 1408 (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) { |
1410 int src_x, src_y; | 1409 int src_x, src_y; |
1411 motion_x = s->all_fragments[i].motion_x; | 1410 motion_x = motion_val[y*fragment_width + x][0]; |
1412 motion_y = s->all_fragments[i].motion_y; | 1411 motion_y = motion_val[y*fragment_width + x][1]; |
1413 | 1412 |
1414 src_x= (motion_x>>1) + 8*x; | 1413 src_x= (motion_x>>1) + 8*x; |
1415 src_y= (motion_y>>1) + 8*y; | 1414 src_y= (motion_y>>1) + 8*y; |
1416 | 1415 |
1417 motion_halfpel_index = motion_x & 0x01; | 1416 motion_halfpel_index = motion_x & 0x01; |
1566 s->fragment_start[2] = y_fragment_count + c_fragment_count; | 1565 s->fragment_start[2] = y_fragment_count + c_fragment_count; |
1567 | 1566 |
1568 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); | 1567 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); |
1569 s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int)); | 1568 s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int)); |
1570 s->dct_tokens_base = av_malloc(64*s->fragment_count * sizeof(*s->dct_tokens_base)); | 1569 s->dct_tokens_base = av_malloc(64*s->fragment_count * sizeof(*s->dct_tokens_base)); |
1570 s->motion_val[0] = av_malloc(y_fragment_count * sizeof(*s->motion_val[0])); | |
1571 s->motion_val[1] = av_malloc(c_fragment_count * sizeof(*s->motion_val[1])); | |
1572 | |
1571 if (!s->superblock_coding || !s->all_fragments || !s->dct_tokens_base || | 1573 if (!s->superblock_coding || !s->all_fragments || !s->dct_tokens_base || |
1572 !s->coded_fragment_list[0]) { | 1574 !s->coded_fragment_list[0] || !s->motion_val[0] || !s->motion_val[1]) { |
1573 vp3_decode_end(avctx); | 1575 vp3_decode_end(avctx); |
1574 return -1; | 1576 return -1; |
1575 } | 1577 } |
1576 | 1578 |
1577 if (!s->theora_tables) | 1579 if (!s->theora_tables) |
1872 av_free(s->all_fragments); | 1874 av_free(s->all_fragments); |
1873 av_free(s->coded_fragment_list[0]); | 1875 av_free(s->coded_fragment_list[0]); |
1874 av_free(s->dct_tokens_base); | 1876 av_free(s->dct_tokens_base); |
1875 av_free(s->superblock_fragments); | 1877 av_free(s->superblock_fragments); |
1876 av_free(s->macroblock_coding); | 1878 av_free(s->macroblock_coding); |
1879 av_free(s->motion_val[0]); | |
1880 av_free(s->motion_val[1]); | |
1877 | 1881 |
1878 for (i = 0; i < 16; i++) { | 1882 for (i = 0; i < 16; i++) { |
1879 free_vlc(&s->dc_vlc[i]); | 1883 free_vlc(&s->dc_vlc[i]); |
1880 free_vlc(&s->ac_vlc_1[i]); | 1884 free_vlc(&s->ac_vlc_1[i]); |
1881 free_vlc(&s->ac_vlc_2[i]); | 1885 free_vlc(&s->ac_vlc_2[i]); |