libavcodec.hg: vp3.c comparison

comparison vp3.c @ 11133:cd2956d08cc1 libavcodec

Don't pre-calculate first_pixel 3.6% faster on Elephants_Dream_HD-q7-aq7.ogg on my penryn

author	conrad
date	Fri, 12 Feb 2010 22:01:38 +0000
parents	449c12b6c3a0
children	68e16ac22032

comparison

equal deleted inserted replaced

-:449c12b6c3a0
+:cd2956d08cc1
 } Coeff;
 //FIXME split things out into their own arrays
 typedef struct Vp3Fragment {
 Coeff *next_coeff;
-/* address of first pixel taking into account which plane the fragment
-* lives on as well as the plane stride */
-int first_pixel;
 /* this is the macroblock that the fragment belongs to */
 uint16_t macroblock;
 uint8_t coding_method;
 int8_t motion_x;
 int8_t motion_y;
 Vp3Fragment *all_fragments;
 uint8_t *coeff_counts;
 Coeff *coeffs;
 Coeff *next_coeff;
 int fragment_start[3];
+int data_offset[3];
 ScanTable scantable;
 /* tables */
 uint16_t coded_dc_scale_factor[64];
 /* this is a list of indexes into the all_fragments array indicating
 * which of the fragments are coded */
 int *coded_fragment_list;
 int coded_fragment_list_index;
-int pixel_addresses_initialized;
 /* track which fragments have already been decoded; called 'fast'
 * because this data structure avoids having to iterate through every
 * fragment in coded_fragment_list; once a fragment has been fully
 * decoded, it is removed from this list */
 int height          = s->fragment_height >> !!plane;
 int fragment        = s->fragment_start        [plane] + ystart * width;
 int stride          = s->current_frame.linesize[plane];
 uint8_t *plane_data = s->current_frame.data    [plane];
 if (!s->flipped_image) stride = -stride;
+plane_data += s->data_offset[plane] + 8*ystart*stride;
 for (y = ystart; y < yend; y++) {
 for (x = 0; x < width; x++) {
 /* This code basically just deblocks on the edges of coded blocks.
 if( s->all_fragments[fragment].coding_method != MODE_COPY )
 {
 /* do not perform left edge filter for left columns frags */
 if (x > 0) {
 s->dsp.vp3_h_loop_filter(
-plane_data + s->all_fragments[fragment].first_pixel,
+plane_data + 8*x,
 stride, bounding_values);
 }
 /* do not perform top edge filter for top row fragments */
 if (y > 0) {
 s->dsp.vp3_v_loop_filter(
-plane_data + s->all_fragments[fragment].first_pixel,
+plane_data + 8*x,
 stride, bounding_values);
 }
 /* do not perform right edge filter for right column
 * fragments or if right fragment neighbor is also coded
 * in this frame (it will be filtered in next iteration) */
 if ((x < width - 1) &&
 (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
 s->dsp.vp3_h_loop_filter(
-plane_data + s->all_fragments[fragment + 1].first_pixel,
+plane_data + 8*x + 8,
 stride, bounding_values);
 }
 /* do not perform bottom edge filter for bottom row
 * fragments or if bottom fragment neighbor is also coded
 * in this frame (it will be filtered in the next row) */
 if ((y < height - 1) &&
 (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
 s->dsp.vp3_v_loop_filter(
-plane_data + s->all_fragments[fragment + width].first_pixel,
+plane_data + 8*x + 8*stride,
 stride, bounding_values);
 }
 }
 fragment++;
 }
+plane_data += 8*stride;
 }
 }
 /**
 * called when all pixels up to row y are complete
 if (slice >= s->macroblock_height)
 return;
 for (plane = 0; plane < 3; plane++) {
-uint8_t *output_plane = s->current_frame.data    [plane];
+uint8_t *output_plane = s->current_frame.data    [plane] + s->data_offset[plane];
-uint8_t *  last_plane = s->   last_frame.data    [plane];
+uint8_t *  last_plane = s->   last_frame.data    [plane] + s->data_offset[plane];
-uint8_t *golden_plane = s-> golden_frame.data    [plane];
+uint8_t *golden_plane = s-> golden_frame.data    [plane] + s->data_offset[plane];
 int stride            = s->current_frame.linesize[plane];
 int plane_width       = s->width  >> !!plane;
 int plane_height      = s->height >> !!plane;
 int y =        slice *  FRAGMENT_PIXELS << !plane ;
 int slice_height = y + (FRAGMENT_PIXELS << !plane);
 /* for each fragment row in the slice (both of them)... */
 for (; y < slice_height; y += 8) {
 /* for each fragment in a row... */
 for (x = 0; x < plane_width; x += 8, i++) {
+int first_pixel = y*stride + x;
 if ((i < 0) || (i >= s->fragment_count)) {
 av_log(s->avctx, AV_LOG_ERROR, "  vp3:render_slice(): bad fragment number (%d)\n", i);
 return;
 }
 (s->all_fragments[i].coding_method == MODE_GOLDEN_MV))
 motion_source= golden_plane;
 else
 motion_source= last_plane;
-motion_source += s->all_fragments[i].first_pixel;
+motion_source += first_pixel;
 motion_halfpel_index = 0;
 /* sort out the motion vector if this fragment is coded
 * using a motion vector method */
 if ((s->all_fragments[i].coding_method > MODE_INTRA) &&
 put_no_rnd_pixels_l2 which would look more like the
 VP3 source but this would be slower as
 put_no_rnd_pixels_tab is better optimzed */
 if(motion_halfpel_index != 3){
 s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
-output_plane + s->all_fragments[i].first_pixel,
+output_plane + first_pixel,
 motion_source, stride, 8);
 }else{
 int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1
 s->dsp.put_no_rnd_pixels_l2[1](
-output_plane + s->all_fragments[i].first_pixel,
+output_plane + first_pixel,
 motion_source - d,
 motion_source + stride + 1 + d,
 stride, 8);
 }
 dequantizer = s->qmat[s->all_fragments[i].qpi][1][plane];
 if (s->all_fragments[i].coding_method == MODE_INTRA) {
 if(s->avctx->idct_algo!=FF_IDCT_VP3)
 block[0] += 128<<3;
 s->dsp.idct_put(
-output_plane + s->all_fragments[i].first_pixel,
+output_plane + first_pixel,
 stride,
 block);
 } else {
 s->dsp.idct_add(
-output_plane + s->all_fragments[i].first_pixel,
+output_plane + first_pixel,
 stride,
 block);
 }
 } else {
 /* copy directly from the previous frame */
 s->dsp.put_pixels_tab[1][0](
-output_plane + s->all_fragments[i].first_pixel,
+output_plane + first_pixel,
-last_plane + s->all_fragments[i].first_pixel,
+last_plane + first_pixel,
 stride, 8);
 }
 }
 // Filter the previous block row. We can't filter the current row yet
 */
 // now that we've filtered the last rows, they're safe to display
 if (slice)
 vp3_draw_horiz_band(s, 16*slice);
-}
-/*
-* This function computes the first pixel addresses for each fragment.
-* This function needs to be invoked after the first frame is allocated
-* so that it has access to the plane strides.
-*/
-static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
-{
-#define Y_INITIAL(chroma_shift)  s->flipped_image ? 1  : s->fragment_height >> chroma_shift
-#define Y_FINISHED(chroma_shift) s->flipped_image ? y <= s->fragment_height >> chroma_shift : y > 0
-int i, x, y;
-const int y_inc = s->flipped_image ? 1 : -1;
-/* figure out the first pixel addresses for each of the fragments */
-/* Y plane */
-i = 0;
-for (y = Y_INITIAL(0); Y_FINISHED(0); y += y_inc) {
-for (x = 0; x < s->fragment_width; x++) {
-s->all_fragments[i++].first_pixel =
-s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS -
-s->golden_frame.linesize[0] +
-x * FRAGMENT_PIXELS;
-}
-}
-/* U plane */
-i = s->fragment_start[1];
-for (y = Y_INITIAL(1); Y_FINISHED(1); y += y_inc) {
-for (x = 0; x < s->fragment_width / 2; x++) {
-s->all_fragments[i++].first_pixel =
-s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS -
-s->golden_frame.linesize[1] +
-x * FRAGMENT_PIXELS;
-}
-}
-/* V plane */
-i = s->fragment_start[2];
-for (y = Y_INITIAL(1); Y_FINISHED(1); y += y_inc) {
-for (x = 0; x < s->fragment_width / 2; x++) {
-s->all_fragments[i++].first_pixel =
-s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS -
-s->golden_frame.linesize[2] +
-x * FRAGMENT_PIXELS;
-}
-}
 }
 /*
 * This is the ffmpeg/libavcodec API init function.
 */
 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
 s->coeff_counts = av_malloc(s->fragment_count * sizeof(*s->coeff_counts));
 s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65);
 s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int));
 s->fast_fragment_list = av_malloc(s->fragment_count * sizeof(int));
-s->pixel_addresses_initialized = 0;
 if (!s->superblock_coding || !s->all_fragments || !s->coeff_counts ||
 !s->coeffs || !s->coded_fragment_list || !s->fast_fragment_list) {
 vp3_decode_end(avctx);
 return -1;
 }
 return -1;
 }
 /* golden frame is also the current frame */
 s->current_frame= s->golden_frame;
-/* time to figure out pixel addresses? */
-if (!s->pixel_addresses_initialized)
-{
-vp3_calculate_pixel_addresses(s);
-s->pixel_addresses_initialized = 1;
-}
 } else {
 /* allocate a new current frame */
 s->current_frame.reference = 3;
-if (!s->pixel_addresses_initialized) {
+if (!s->golden_frame.data[0]) {
 av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n");
 return -1;
 }
 if(avctx->get_buffer(avctx, &s->current_frame) < 0) {
 av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n");
 return -1;
 }
 if (unpack_dct_coeffs(s, &gb)){
 av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n");
 return -1;
+}
+for (i = 0; i < 3; i++) {
+if (s->flipped_image)
+s->data_offset[i] = 0;
+else
+s->data_offset[i] = ((s->height>>!!i)-1) * s->current_frame.linesize[i];
 }
 s->last_slice_end = 0;
 for (i = 0; i < s->macroblock_height; i++)
 render_slice(s, i);

Mercurial > libavcodec.hg

comparison vp3.c @ 11133:cd2956d08cc1 libavcodec