libavcodec.hg: vp8.c comparison

comparison vp8.c @ 12248:121272849def libavcodec

VP8: always_inline some things to force gcc to do the right thing Mostly seems to help in the MC code, which gets a hundred cycles faster.

author	darkshikari
date	Fri, 23 Jul 2010 21:36:21 +0000
parents	50a96623366b
children	35ee666e4496

comparison

equal deleted inserted replaced

-:50a96623366b
+:121272849def
 }
 return 0;
 }
-static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src,
+static av_always_inline
-int mb_x, int mb_y)
+void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y)
 {
 #define MARGIN (16 << 2)
 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN),
 ((s->mb_width  - 1 - mb_x) << 6) + MARGIN);
 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN),
 ((s->mb_height - 1 - mb_y) << 6) + MARGIN);
 }
-static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+static av_always_inline
-VP56mv near[2], VP56mv *best, uint8_t cnt[4])
+void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+VP56mv near[2], VP56mv *best, uint8_t cnt[4])
 {
 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
 mb - 1 /* left */,
 mb + 1 /* top-left */ };
 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
 x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]);
 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
 }
-static const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
+static av_always_inline
+const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
 {
 if (left == top)
 return vp8_submv_prob[4-!!left];
 if (!top)
 return vp8_submv_prob[2];
 /**
 * Split motion vector prediction, 16.4.
 * @returns the number of motion vectors parsed (2, 4 or 16)
 */
-static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
+static av_always_inline
+int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
 {
 int part_idx = mb->partitioning =
 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob);
 int n, num = vp8_mbsplit_count[part_idx];
 VP8Macroblock *top_mb  = &mb[2];
 }
 return num;
 }
-static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4,
+static av_always_inline
-int stride, int keyframe)
+void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4,
+int stride, int keyframe)
 {
 int x, y, t, l, i;
 if (keyframe) {
 const uint8_t *ctx;
 for (i = 0; i < 16; i++)
 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
 }
 }
-static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+static av_always_inline
-uint8_t *intra4x4, uint8_t *segment)
+void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+uint8_t *intra4x4, uint8_t *segment)
 {
 VP56RangeCoder *c = &s->c;
 if (s->segmentation.update_map)
 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
 offset = 0;
 }
 return nonzero;
 }
-static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
+static av_always_inline
-uint8_t t_nnz[9], uint8_t l_nnz[9])
+void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
+uint8_t t_nnz[9], uint8_t l_nnz[9])
 {
 LOCAL_ALIGNED_16(DCTELEM, dc,[16]);
 int i, x, y, luma_start = 0, luma_ctx = 3;
 int nnz_pred, nnz, nnz_total = 0;
 int segment = s->segment;
 XCHG(top_border+16,    src_cb, 1);
 XCHG(top_border+24,    src_cr, 1);
 }
 }
-static int check_intra_pred_mode(int mode, int mb_x, int mb_y)
+static av_always_inline
+int check_intra_pred_mode(int mode, int mb_x, int mb_y)
 {
 if (mode == DC_PRED8x8) {
 if (!mb_x) {
 mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
 } else if (!mb_y) {
 }
 }
 return mode;
 }
-static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
+static av_always_inline
-uint8_t *intra4x4, int mb_x, int mb_y)
+void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
+uint8_t *intra4x4, int mb_x, int mb_y)
 {
 int x, y, mode, nnz, tr;
 // for the first row, we need to run xchg_mb_border to init the top edge to 127
 // otherwise, skip it if we aren't going to deblock
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
 */
-static inline void vp8_mc(VP8Context *s, int luma,
+static av_always_inline
-uint8_t *dst, uint8_t *src, const VP56mv *mv,
+void vp8_mc(VP8Context *s, int luma,
-int x_off, int y_off, int block_w, int block_h,
+uint8_t *dst, uint8_t *src, const VP56mv *mv,
-int width, int height, int linesize,
+int x_off, int y_off, int block_w, int block_h,
-vp8_mc_func mc_func[3][3])
+int width, int height, int linesize,
+vp8_mc_func mc_func[3][3])
 {
 if (AV_RN32A(mv)) {
 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 };
 int mx = (mv->x << luma)&7, mx_idx = idx[mx];
 int my = (mv->y << luma)&7, my_idx = idx[my];
 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
 } else
 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
 }
-static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
+static av_always_inline
-AVFrame *ref_frame, int x_off, int y_off,
+void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
-int bx_off, int by_off,
+AVFrame *ref_frame, int x_off, int y_off,
-int block_w, int block_h,
+int bx_off, int by_off,
-int width, int height, VP56mv *mv)
+int block_w, int block_h,
+int width, int height, VP56mv *mv)
 {
 VP56mv uvmv = *mv;
 /* Y */
 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off,
 s->put_pixels_tab[1 + (block_w == 4)]);
 }
 /* Fetch pixels for estimated mv 4 macroblocks ahead.
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
-static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
+static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
 {
 /* Don't prefetch refs that haven't been used very often this frame. */
 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
 int x_off = mb_x << 4, y_off = mb_y << 4;
 int mx = mb->mv.x + x_off + 8;
 }
 /**
 * Apply motion vectors to prediction buffer, chapter 18.
 */
-static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
+static av_always_inline
-int mb_x, int mb_y)
+void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
+int mb_x, int mb_y)
 {
 int x_off = mb_x << 4, y_off = mb_y << 4;
 int width = 16*s->mb_width, height = 16*s->mb_height;
 AVFrame *ref = s->framep[mb->ref_frame];
 VP56mv *bmv = mb->bmv;
 8, 8, 8, 8, width, height, &bmv[3]);
 break;
 }
 }
-static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
+static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
 {
 int x, y, ch;
 if (mb->mode != MODE_I4x4) {
 uint8_t *y_dst = dst[0];
 }
 }
 }
 }
-static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
+static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
 {
 int interior_limit, filter_level;
 if (s->segmentation.enabled) {
 filter_level = s->segmentation.filter_level[s->segment];
 f->filter_level = filter_level;
 f->inner_limit = interior_limit;
 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
 }
-static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
+static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
 {
 int mbedge_lim, bedge_lim, hev_thresh;
 int filter_level = f->filter_level;
 int inner_limit = f->inner_limit;
 int inner_filter = f->inner_filter;
 uvlinesize,  bedge_lim,
 inner_limit, hev_thresh);
 }
 }
-static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
+static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
 {
 int mbedge_lim, bedge_lim;
 int filter_level = f->filter_level;
 int inner_limit = f->inner_limit;
 int inner_filter = f->inner_filter;

Mercurial > libavcodec.hg

comparison vp8.c @ 12248:121272849def libavcodec