Mercurial > libavcodec.hg
changeset 12248:121272849def libavcodec
VP8: always_inline some things to force gcc to do the right thing
Mostly seems to help in the MC code, which gets a hundred cycles faster.
author | darkshikari |
---|---|
date | Fri, 23 Jul 2010 21:36:21 +0000 |
parents | 50a96623366b |
children | 35ee666e4496 |
files | vp8.c |
diffstat | 1 files changed, 43 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/vp8.c Fri Jul 23 21:17:18 2010 +0000 +++ b/vp8.c Fri Jul 23 21:36:21 2010 +0000 @@ -521,8 +521,8 @@ return 0; } -static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, - int mb_x, int mb_y) +static av_always_inline +void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) { #define MARGIN (16 << 2) dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), @@ -531,8 +531,9 @@ ((s->mb_height - 1 - mb_y) << 6) + MARGIN); } -static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, - VP56mv near[2], VP56mv *best, uint8_t cnt[4]) +static av_always_inline +void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, + VP56mv near[2], VP56mv *best, uint8_t cnt[4]) { VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, mb - 1 /* left */, @@ -614,7 +615,8 @@ return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; } -static const uint8_t *get_submv_prob(uint32_t left, uint32_t top) +static av_always_inline +const uint8_t *get_submv_prob(uint32_t left, uint32_t top) { if (left == top) return vp8_submv_prob[4-!!left]; @@ -627,7 +629,8 @@ * Split motion vector prediction, 16.4. * @returns the number of motion vectors parsed (2, 4 or 16) */ -static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) +static av_always_inline +int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) { int part_idx = mb->partitioning = vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); @@ -678,8 +681,9 @@ return num; } -static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, - int stride, int keyframe) +static av_always_inline +void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, + int stride, int keyframe) { int x, y, t, l, i; @@ -700,8 +704,9 @@ } } -static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, - uint8_t *intra4x4, uint8_t *segment) +static av_always_inline +void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, + uint8_t *intra4x4, uint8_t *segment) { VP56RangeCoder *c = &s->c; @@ -827,8 +832,9 @@ return nonzero; } -static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, - uint8_t t_nnz[9], uint8_t l_nnz[9]) +static av_always_inline +void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, + uint8_t t_nnz[9], uint8_t l_nnz[9]) { LOCAL_ALIGNED_16(DCTELEM, dc,[16]); int i, x, y, luma_start = 0, luma_ctx = 3; @@ -925,7 +931,8 @@ } } -static int check_intra_pred_mode(int mode, int mb_x, int mb_y) +static av_always_inline +int check_intra_pred_mode(int mode, int mb_x, int mb_y) { if (mode == DC_PRED8x8) { if (!mb_x) { @@ -937,8 +944,9 @@ return mode; } -static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, - uint8_t *intra4x4, int mb_x, int mb_y) +static av_always_inline +void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, + uint8_t *intra4x4, int mb_x, int mb_y) { int x, y, mode, nnz, tr; @@ -1020,11 +1028,12 @@ * @param linesize size of a single line of plane data, including padding * @param mc_func motion compensation function pointers (bilinear or sixtap MC) */ -static inline void vp8_mc(VP8Context *s, int luma, - uint8_t *dst, uint8_t *src, const VP56mv *mv, - int x_off, int y_off, int block_w, int block_h, - int width, int height, int linesize, - vp8_mc_func mc_func[3][3]) +static av_always_inline +void vp8_mc(VP8Context *s, int luma, + uint8_t *dst, uint8_t *src, const VP56mv *mv, + int x_off, int y_off, int block_w, int block_h, + int width, int height, int linesize, + vp8_mc_func mc_func[3][3]) { if (AV_RN32A(mv)) { static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; @@ -1048,11 +1057,12 @@ mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); } -static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], - AVFrame *ref_frame, int x_off, int y_off, - int bx_off, int by_off, - int block_w, int block_h, - int width, int height, VP56mv *mv) +static av_always_inline +void vp8_mc_part(VP8Context *s, uint8_t *dst[3], + AVFrame *ref_frame, int x_off, int y_off, + int bx_off, int by_off, + int block_w, int block_h, + int width, int height, VP56mv *mv) { VP56mv uvmv = *mv; @@ -1083,7 +1093,7 @@ /* Fetch pixels for estimated mv 4 macroblocks ahead. * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ -static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) +static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) { /* Don't prefetch refs that haven't been used very often this frame. */ if (s->ref_count[ref-1] > (mb_xy >> 5)) { @@ -1101,8 +1111,9 @@ /** * Apply motion vectors to prediction buffer, chapter 18. */ -static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, - int mb_x, int mb_y) +static av_always_inline +void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, + int mb_x, int mb_y) { int x_off = mb_x << 4, y_off = mb_y << 4; int width = 16*s->mb_width, height = 16*s->mb_height; @@ -1185,7 +1196,7 @@ } } -static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) +static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) { int x, y, ch; @@ -1236,7 +1247,7 @@ } } -static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) +static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) { int interior_limit, filter_level; @@ -1276,7 +1287,7 @@ f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; } -static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) +static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) { int mbedge_lim, bedge_lim, hev_thresh; int filter_level = f->filter_level; @@ -1345,7 +1356,7 @@ } } -static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) +static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) { int mbedge_lim, bedge_lim; int filter_level = f->filter_level;