Mercurial > libavcodec.hg
comparison vp8.c @ 12248:121272849def libavcodec
VP8: always_inline some things to force gcc to do the right thing
Mostly seems to help in the MC code, which gets a hundred cycles faster.
author | darkshikari |
---|---|
date | Fri, 23 Jul 2010 21:36:21 +0000 |
parents | 50a96623366b |
children | 35ee666e4496 |
comparison
equal
deleted
inserted
replaced
12247:50a96623366b | 12248:121272849def |
---|---|
519 } | 519 } |
520 | 520 |
521 return 0; | 521 return 0; |
522 } | 522 } |
523 | 523 |
524 static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, | 524 static av_always_inline |
525 int mb_x, int mb_y) | 525 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) |
526 { | 526 { |
527 #define MARGIN (16 << 2) | 527 #define MARGIN (16 << 2) |
528 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), | 528 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), |
529 ((s->mb_width - 1 - mb_x) << 6) + MARGIN); | 529 ((s->mb_width - 1 - mb_x) << 6) + MARGIN); |
530 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), | 530 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), |
531 ((s->mb_height - 1 - mb_y) << 6) + MARGIN); | 531 ((s->mb_height - 1 - mb_y) << 6) + MARGIN); |
532 } | 532 } |
533 | 533 |
534 static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, | 534 static av_always_inline |
535 VP56mv near[2], VP56mv *best, uint8_t cnt[4]) | 535 void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
536 VP56mv near[2], VP56mv *best, uint8_t cnt[4]) | |
536 { | 537 { |
537 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, | 538 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, |
538 mb - 1 /* left */, | 539 mb - 1 /* left */, |
539 mb + 1 /* top-left */ }; | 540 mb + 1 /* top-left */ }; |
540 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; | 541 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; |
612 x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]); | 613 x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]); |
613 | 614 |
614 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; | 615 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; |
615 } | 616 } |
616 | 617 |
617 static const uint8_t *get_submv_prob(uint32_t left, uint32_t top) | 618 static av_always_inline |
619 const uint8_t *get_submv_prob(uint32_t left, uint32_t top) | |
618 { | 620 { |
619 if (left == top) | 621 if (left == top) |
620 return vp8_submv_prob[4-!!left]; | 622 return vp8_submv_prob[4-!!left]; |
621 if (!top) | 623 if (!top) |
622 return vp8_submv_prob[2]; | 624 return vp8_submv_prob[2]; |
625 | 627 |
626 /** | 628 /** |
627 * Split motion vector prediction, 16.4. | 629 * Split motion vector prediction, 16.4. |
628 * @returns the number of motion vectors parsed (2, 4 or 16) | 630 * @returns the number of motion vectors parsed (2, 4 or 16) |
629 */ | 631 */ |
630 static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) | 632 static av_always_inline |
633 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) | |
631 { | 634 { |
632 int part_idx = mb->partitioning = | 635 int part_idx = mb->partitioning = |
633 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); | 636 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); |
634 int n, num = vp8_mbsplit_count[part_idx]; | 637 int n, num = vp8_mbsplit_count[part_idx]; |
635 VP8Macroblock *top_mb = &mb[2]; | 638 VP8Macroblock *top_mb = &mb[2]; |
676 } | 679 } |
677 | 680 |
678 return num; | 681 return num; |
679 } | 682 } |
680 | 683 |
681 static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, | 684 static av_always_inline |
682 int stride, int keyframe) | 685 void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, |
686 int stride, int keyframe) | |
683 { | 687 { |
684 int x, y, t, l, i; | 688 int x, y, t, l, i; |
685 | 689 |
686 if (keyframe) { | 690 if (keyframe) { |
687 const uint8_t *ctx; | 691 const uint8_t *ctx; |
698 for (i = 0; i < 16; i++) | 702 for (i = 0; i < 16; i++) |
699 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); | 703 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); |
700 } | 704 } |
701 } | 705 } |
702 | 706 |
703 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, | 707 static av_always_inline |
704 uint8_t *intra4x4, uint8_t *segment) | 708 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
709 uint8_t *intra4x4, uint8_t *segment) | |
705 { | 710 { |
706 VP56RangeCoder *c = &s->c; | 711 VP56RangeCoder *c = &s->c; |
707 | 712 |
708 if (s->segmentation.update_map) | 713 if (s->segmentation.update_map) |
709 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); | 714 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); |
825 offset = 0; | 830 offset = 0; |
826 } | 831 } |
827 return nonzero; | 832 return nonzero; |
828 } | 833 } |
829 | 834 |
830 static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, | 835 static av_always_inline |
831 uint8_t t_nnz[9], uint8_t l_nnz[9]) | 836 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, |
837 uint8_t t_nnz[9], uint8_t l_nnz[9]) | |
832 { | 838 { |
833 LOCAL_ALIGNED_16(DCTELEM, dc,[16]); | 839 LOCAL_ALIGNED_16(DCTELEM, dc,[16]); |
834 int i, x, y, luma_start = 0, luma_ctx = 3; | 840 int i, x, y, luma_start = 0, luma_ctx = 3; |
835 int nnz_pred, nnz, nnz_total = 0; | 841 int nnz_pred, nnz, nnz_total = 0; |
836 int segment = s->segment; | 842 int segment = s->segment; |
923 XCHG(top_border+16, src_cb, 1); | 929 XCHG(top_border+16, src_cb, 1); |
924 XCHG(top_border+24, src_cr, 1); | 930 XCHG(top_border+24, src_cr, 1); |
925 } | 931 } |
926 } | 932 } |
927 | 933 |
928 static int check_intra_pred_mode(int mode, int mb_x, int mb_y) | 934 static av_always_inline |
935 int check_intra_pred_mode(int mode, int mb_x, int mb_y) | |
929 { | 936 { |
930 if (mode == DC_PRED8x8) { | 937 if (mode == DC_PRED8x8) { |
931 if (!mb_x) { | 938 if (!mb_x) { |
932 mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; | 939 mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; |
933 } else if (!mb_y) { | 940 } else if (!mb_y) { |
935 } | 942 } |
936 } | 943 } |
937 return mode; | 944 return mode; |
938 } | 945 } |
939 | 946 |
940 static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | 947 static av_always_inline |
941 uint8_t *intra4x4, int mb_x, int mb_y) | 948 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
949 uint8_t *intra4x4, int mb_x, int mb_y) | |
942 { | 950 { |
943 int x, y, mode, nnz, tr; | 951 int x, y, mode, nnz, tr; |
944 | 952 |
945 // for the first row, we need to run xchg_mb_border to init the top edge to 127 | 953 // for the first row, we need to run xchg_mb_border to init the top edge to 127 |
946 // otherwise, skip it if we aren't going to deblock | 954 // otherwise, skip it if we aren't going to deblock |
1018 * @param width width of src/dst plane data | 1026 * @param width width of src/dst plane data |
1019 * @param height height of src/dst plane data | 1027 * @param height height of src/dst plane data |
1020 * @param linesize size of a single line of plane data, including padding | 1028 * @param linesize size of a single line of plane data, including padding |
1021 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) | 1029 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
1022 */ | 1030 */ |
1023 static inline void vp8_mc(VP8Context *s, int luma, | 1031 static av_always_inline |
1024 uint8_t *dst, uint8_t *src, const VP56mv *mv, | 1032 void vp8_mc(VP8Context *s, int luma, |
1025 int x_off, int y_off, int block_w, int block_h, | 1033 uint8_t *dst, uint8_t *src, const VP56mv *mv, |
1026 int width, int height, int linesize, | 1034 int x_off, int y_off, int block_w, int block_h, |
1027 vp8_mc_func mc_func[3][3]) | 1035 int width, int height, int linesize, |
1036 vp8_mc_func mc_func[3][3]) | |
1028 { | 1037 { |
1029 if (AV_RN32A(mv)) { | 1038 if (AV_RN32A(mv)) { |
1030 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; | 1039 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; |
1031 int mx = (mv->x << luma)&7, mx_idx = idx[mx]; | 1040 int mx = (mv->x << luma)&7, mx_idx = idx[mx]; |
1032 int my = (mv->y << luma)&7, my_idx = idx[my]; | 1041 int my = (mv->y << luma)&7, my_idx = idx[my]; |
1046 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); | 1055 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); |
1047 } else | 1056 } else |
1048 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); | 1057 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); |
1049 } | 1058 } |
1050 | 1059 |
1051 static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], | 1060 static av_always_inline |
1052 AVFrame *ref_frame, int x_off, int y_off, | 1061 void vp8_mc_part(VP8Context *s, uint8_t *dst[3], |
1053 int bx_off, int by_off, | 1062 AVFrame *ref_frame, int x_off, int y_off, |
1054 int block_w, int block_h, | 1063 int bx_off, int by_off, |
1055 int width, int height, VP56mv *mv) | 1064 int block_w, int block_h, |
1065 int width, int height, VP56mv *mv) | |
1056 { | 1066 { |
1057 VP56mv uvmv = *mv; | 1067 VP56mv uvmv = *mv; |
1058 | 1068 |
1059 /* Y */ | 1069 /* Y */ |
1060 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, | 1070 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, |
1081 s->put_pixels_tab[1 + (block_w == 4)]); | 1091 s->put_pixels_tab[1 + (block_w == 4)]); |
1082 } | 1092 } |
1083 | 1093 |
1084 /* Fetch pixels for estimated mv 4 macroblocks ahead. | 1094 /* Fetch pixels for estimated mv 4 macroblocks ahead. |
1085 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ | 1095 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ |
1086 static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) | 1096 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) |
1087 { | 1097 { |
1088 /* Don't prefetch refs that haven't been used very often this frame. */ | 1098 /* Don't prefetch refs that haven't been used very often this frame. */ |
1089 if (s->ref_count[ref-1] > (mb_xy >> 5)) { | 1099 if (s->ref_count[ref-1] > (mb_xy >> 5)) { |
1090 int x_off = mb_x << 4, y_off = mb_y << 4; | 1100 int x_off = mb_x << 4, y_off = mb_y << 4; |
1091 int mx = mb->mv.x + x_off + 8; | 1101 int mx = mb->mv.x + x_off + 8; |
1099 } | 1109 } |
1100 | 1110 |
1101 /** | 1111 /** |
1102 * Apply motion vectors to prediction buffer, chapter 18. | 1112 * Apply motion vectors to prediction buffer, chapter 18. |
1103 */ | 1113 */ |
1104 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | 1114 static av_always_inline |
1105 int mb_x, int mb_y) | 1115 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
1116 int mb_x, int mb_y) | |
1106 { | 1117 { |
1107 int x_off = mb_x << 4, y_off = mb_y << 4; | 1118 int x_off = mb_x << 4, y_off = mb_y << 4; |
1108 int width = 16*s->mb_width, height = 16*s->mb_height; | 1119 int width = 16*s->mb_width, height = 16*s->mb_height; |
1109 AVFrame *ref = s->framep[mb->ref_frame]; | 1120 AVFrame *ref = s->framep[mb->ref_frame]; |
1110 VP56mv *bmv = mb->bmv; | 1121 VP56mv *bmv = mb->bmv; |
1183 8, 8, 8, 8, width, height, &bmv[3]); | 1194 8, 8, 8, 8, width, height, &bmv[3]); |
1184 break; | 1195 break; |
1185 } | 1196 } |
1186 } | 1197 } |
1187 | 1198 |
1188 static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) | 1199 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) |
1189 { | 1200 { |
1190 int x, y, ch; | 1201 int x, y, ch; |
1191 | 1202 |
1192 if (mb->mode != MODE_I4x4) { | 1203 if (mb->mode != MODE_I4x4) { |
1193 uint8_t *y_dst = dst[0]; | 1204 uint8_t *y_dst = dst[0]; |
1234 } | 1245 } |
1235 } | 1246 } |
1236 } | 1247 } |
1237 } | 1248 } |
1238 | 1249 |
1239 static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) | 1250 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) |
1240 { | 1251 { |
1241 int interior_limit, filter_level; | 1252 int interior_limit, filter_level; |
1242 | 1253 |
1243 if (s->segmentation.enabled) { | 1254 if (s->segmentation.enabled) { |
1244 filter_level = s->segmentation.filter_level[s->segment]; | 1255 filter_level = s->segmentation.filter_level[s->segment]; |
1274 f->filter_level = filter_level; | 1285 f->filter_level = filter_level; |
1275 f->inner_limit = interior_limit; | 1286 f->inner_limit = interior_limit; |
1276 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; | 1287 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; |
1277 } | 1288 } |
1278 | 1289 |
1279 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) | 1290 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) |
1280 { | 1291 { |
1281 int mbedge_lim, bedge_lim, hev_thresh; | 1292 int mbedge_lim, bedge_lim, hev_thresh; |
1282 int filter_level = f->filter_level; | 1293 int filter_level = f->filter_level; |
1283 int inner_limit = f->inner_limit; | 1294 int inner_limit = f->inner_limit; |
1284 int inner_filter = f->inner_filter; | 1295 int inner_filter = f->inner_filter; |
1343 uvlinesize, bedge_lim, | 1354 uvlinesize, bedge_lim, |
1344 inner_limit, hev_thresh); | 1355 inner_limit, hev_thresh); |
1345 } | 1356 } |
1346 } | 1357 } |
1347 | 1358 |
1348 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) | 1359 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) |
1349 { | 1360 { |
1350 int mbedge_lim, bedge_lim; | 1361 int mbedge_lim, bedge_lim; |
1351 int filter_level = f->filter_level; | 1362 int filter_level = f->filter_level; |
1352 int inner_limit = f->inner_limit; | 1363 int inner_limit = f->inner_limit; |
1353 int inner_filter = f->inner_filter; | 1364 int inner_filter = f->inner_filter; |