libavcodec.hg: vp3.c comparison

comparison vp3.c @ 11477:445659683743 libavcodec

vp3: Split out motion vectors to their own array 1.5% faster overall decode on my penryn

author	conrad
date	Sat, 13 Mar 2010 10:25:41 +0000
parents	b7744b7bee4b
children	8a4984c5cacc

comparison

equal deleted inserted replaced

-:8ef285dc7f80
+:445659683743
 //FIXME split things out into their own arrays
 typedef struct Vp3Fragment {
 int16_t dc;
 uint8_t coding_method;
-int8_t motion_x;
-int8_t motion_y;
 uint8_t qpi;
 } Vp3Fragment;
 #define SB_NOT_CODED        0
 #define SB_PARTIALLY_CODED  1
 int fragment_height[2];
 Vp3Fragment *all_fragments;
 int fragment_start[3];
 int data_offset[3];
+int8_t (*motion_val[2])[2];
 ScanTable scantable;
 /* tables */
 uint16_t coded_dc_scale_factor[64];
 int last_motion_y = 0;
 int prior_last_motion_x = 0;
 int prior_last_motion_y = 0;
 int current_macroblock;
 int current_fragment;
-Vp3Fragment *frag;
+int frag;
 if (s->keyframe)
 return 0;
 /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */
 /* assign the motion vectors to the correct fragments */
 for (k = 0; k < 4; k++) {
 current_fragment =
 BLOCK_Y*s->fragment_width[0] + BLOCK_X;
 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
-s->all_fragments[current_fragment].motion_x = motion_x[k];
+s->motion_val[0][current_fragment][0] = motion_x[k];
-s->all_fragments[current_fragment].motion_y = motion_y[k];
+s->motion_val[0][current_fragment][1] = motion_y[k];
 } else {
-s->all_fragments[current_fragment].motion_x = motion_x[0];
+s->motion_val[0][current_fragment][0] = motion_x[0];
-s->all_fragments[current_fragment].motion_y = motion_y[0];
+s->motion_val[0][current_fragment][1] = motion_y[0];
 }
 }
-#define SET_CHROMA_MV(mx, my) \
-frag[s->fragment_start[1]].motion_x = mx; \
-frag[s->fragment_start[1]].motion_y = my; \
-frag[s->fragment_start[2]].motion_x = mx; \
-frag[s->fragment_start[2]].motion_y = my
 if (s->chroma_y_shift) {
 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1] + motion_x[2] + motion_x[3], 2);
 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1] + motion_y[2] + motion_y[3], 2);
 }
 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1);
 motion_y[0] = (motion_y[0]>>1) | (motion_y[0]&1);
-frag = s->all_fragments + mb_y*s->fragment_width[1] + mb_x;
+frag = mb_y*s->fragment_width[1] + mb_x;
-SET_CHROMA_MV(motion_x[0], motion_y[0]);
+s->motion_val[1][frag][0] = motion_x[0];
+s->motion_val[1][frag][1] = motion_y[0];
 } else if (s->chroma_x_shift) {
 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1], 1);
 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1], 1);
 motion_x[1] = RSHIFT(motion_x[2] + motion_x[3], 1);
 motion_y[1] = motion_y[0];
 }
 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1);
 motion_x[1] = (motion_x[1]>>1) | (motion_x[1]&1);
-frag = s->all_fragments + 2*mb_y*s->fragment_width[1] + mb_x;
+frag = 2*mb_y*s->fragment_width[1] + mb_x;
 for (k = 0; k < 2; k++) {
-SET_CHROMA_MV(motion_x[k], motion_y[k]);
+s->motion_val[1][frag][0] = motion_x[k];
+s->motion_val[1][frag][1] = motion_y[k];
 frag += s->fragment_width[1];
 }
 } else {
 for (k = 0; k < 4; k++) {
-frag = s->all_fragments + BLOCK_Y*s->fragment_width[1] + BLOCK_X;
+frag = BLOCK_Y*s->fragment_width[1] + BLOCK_X;
 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
-SET_CHROMA_MV(motion_x[k], motion_y[k]);
+s->motion_val[1][frag][0] = motion_x[k];
+s->motion_val[1][frag][1] = motion_y[k];
 } else {
-SET_CHROMA_MV(motion_x[0], motion_y[0]);
+s->motion_val[1][frag][0] = motion_x[0];
+s->motion_val[1][frag][1] = motion_y[0];
 }
 }
 }
 }
 }
 uint8_t *  last_plane = s->   last_frame.data    [plane] + s->data_offset[plane];
 uint8_t *golden_plane = s-> golden_frame.data    [plane] + s->data_offset[plane];
 int stride            = s->current_frame.linesize[plane];
 int plane_width       = s->width  >> (plane && s->chroma_x_shift);
 int plane_height      = s->height >> (plane && s->chroma_y_shift);
+int8_t (*motion_val)[2] = s->motion_val[!!plane];
 int sb_x, sb_y        = slice << (!plane && s->chroma_y_shift);
 int slice_height      = sb_y + 1 + (!plane && s->chroma_y_shift);
 int slice_width       = plane ? s->c_superblock_width : s->y_superblock_width;
 /* sort out the motion vector if this fragment is coded
 * using a motion vector method */
 if ((s->all_fragments[i].coding_method > MODE_INTRA) &&
 (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) {
 int src_x, src_y;
-motion_x = s->all_fragments[i].motion_x;
+motion_x = motion_val[y*fragment_width + x][0];
-motion_y = s->all_fragments[i].motion_y;
+motion_y = motion_val[y*fragment_width + x][1];
 src_x= (motion_x>>1) + 8*x;
 src_y= (motion_y>>1) + 8*y;
 motion_halfpel_index = motion_x & 0x01;
 s->fragment_start[2] = y_fragment_count + c_fragment_count;
 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
 s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int));
 s->dct_tokens_base = av_malloc(64*s->fragment_count * sizeof(*s->dct_tokens_base));
+s->motion_val[0] = av_malloc(y_fragment_count * sizeof(*s->motion_val[0]));
+s->motion_val[1] = av_malloc(c_fragment_count * sizeof(*s->motion_val[1]));
 if (!s->superblock_coding || !s->all_fragments || !s->dct_tokens_base ||
-!s->coded_fragment_list[0]) {
+!s->coded_fragment_list[0] || !s->motion_val[0] || !s->motion_val[1]) {
 vp3_decode_end(avctx);
 return -1;
 }
 if (!s->theora_tables)
 av_free(s->all_fragments);
 av_free(s->coded_fragment_list[0]);
 av_free(s->dct_tokens_base);
 av_free(s->superblock_fragments);
 av_free(s->macroblock_coding);
+av_free(s->motion_val[0]);
+av_free(s->motion_val[1]);
 for (i = 0; i < 16; i++) {
 free_vlc(&s->dc_vlc[i]);
 free_vlc(&s->ac_vlc_1[i]);
 free_vlc(&s->ac_vlc_2[i]);

Mercurial > libavcodec.hg

comparison vp3.c @ 11477:445659683743 libavcodec