comparison vp8.c @ 12248:121272849def libavcodec

VP8: always_inline some things to force gcc to do the right thing Mostly seems to help in the MC code, which gets a hundred cycles faster.
author darkshikari
date Fri, 23 Jul 2010 21:36:21 +0000
parents 50a96623366b
children 35ee666e4496
comparison
equal deleted inserted replaced
12247:50a96623366b 12248:121272849def
519 } 519 }
520 520
521 return 0; 521 return 0;
522 } 522 }
523 523
524 static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, 524 static av_always_inline
525 int mb_x, int mb_y) 525 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y)
526 { 526 {
527 #define MARGIN (16 << 2) 527 #define MARGIN (16 << 2)
528 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), 528 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN),
529 ((s->mb_width - 1 - mb_x) << 6) + MARGIN); 529 ((s->mb_width - 1 - mb_x) << 6) + MARGIN);
530 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), 530 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN),
531 ((s->mb_height - 1 - mb_y) << 6) + MARGIN); 531 ((s->mb_height - 1 - mb_y) << 6) + MARGIN);
532 } 532 }
533 533
534 static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 534 static av_always_inline
535 VP56mv near[2], VP56mv *best, uint8_t cnt[4]) 535 void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
536 VP56mv near[2], VP56mv *best, uint8_t cnt[4])
536 { 537 {
537 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, 538 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
538 mb - 1 /* left */, 539 mb - 1 /* left */,
539 mb + 1 /* top-left */ }; 540 mb + 1 /* top-left */ };
540 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; 541 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
612 x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]); 613 x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]);
613 614
614 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; 615 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
615 } 616 }
616 617
617 static const uint8_t *get_submv_prob(uint32_t left, uint32_t top) 618 static av_always_inline
619 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
618 { 620 {
619 if (left == top) 621 if (left == top)
620 return vp8_submv_prob[4-!!left]; 622 return vp8_submv_prob[4-!!left];
621 if (!top) 623 if (!top)
622 return vp8_submv_prob[2]; 624 return vp8_submv_prob[2];
625 627
626 /** 628 /**
627 * Split motion vector prediction, 16.4. 629 * Split motion vector prediction, 16.4.
628 * @returns the number of motion vectors parsed (2, 4 or 16) 630 * @returns the number of motion vectors parsed (2, 4 or 16)
629 */ 631 */
630 static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) 632 static av_always_inline
633 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
631 { 634 {
632 int part_idx = mb->partitioning = 635 int part_idx = mb->partitioning =
633 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); 636 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob);
634 int n, num = vp8_mbsplit_count[part_idx]; 637 int n, num = vp8_mbsplit_count[part_idx];
635 VP8Macroblock *top_mb = &mb[2]; 638 VP8Macroblock *top_mb = &mb[2];
676 } 679 }
677 680
678 return num; 681 return num;
679 } 682 }
680 683
681 static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, 684 static av_always_inline
682 int stride, int keyframe) 685 void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4,
686 int stride, int keyframe)
683 { 687 {
684 int x, y, t, l, i; 688 int x, y, t, l, i;
685 689
686 if (keyframe) { 690 if (keyframe) {
687 const uint8_t *ctx; 691 const uint8_t *ctx;
698 for (i = 0; i < 16; i++) 702 for (i = 0; i < 16; i++)
699 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); 703 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
700 } 704 }
701 } 705 }
702 706
703 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 707 static av_always_inline
704 uint8_t *intra4x4, uint8_t *segment) 708 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
709 uint8_t *intra4x4, uint8_t *segment)
705 { 710 {
706 VP56RangeCoder *c = &s->c; 711 VP56RangeCoder *c = &s->c;
707 712
708 if (s->segmentation.update_map) 713 if (s->segmentation.update_map)
709 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); 714 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
825 offset = 0; 830 offset = 0;
826 } 831 }
827 return nonzero; 832 return nonzero;
828 } 833 }
829 834
830 static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, 835 static av_always_inline
831 uint8_t t_nnz[9], uint8_t l_nnz[9]) 836 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
837 uint8_t t_nnz[9], uint8_t l_nnz[9])
832 { 838 {
833 LOCAL_ALIGNED_16(DCTELEM, dc,[16]); 839 LOCAL_ALIGNED_16(DCTELEM, dc,[16]);
834 int i, x, y, luma_start = 0, luma_ctx = 3; 840 int i, x, y, luma_start = 0, luma_ctx = 3;
835 int nnz_pred, nnz, nnz_total = 0; 841 int nnz_pred, nnz, nnz_total = 0;
836 int segment = s->segment; 842 int segment = s->segment;
923 XCHG(top_border+16, src_cb, 1); 929 XCHG(top_border+16, src_cb, 1);
924 XCHG(top_border+24, src_cr, 1); 930 XCHG(top_border+24, src_cr, 1);
925 } 931 }
926 } 932 }
927 933
928 static int check_intra_pred_mode(int mode, int mb_x, int mb_y) 934 static av_always_inline
935 int check_intra_pred_mode(int mode, int mb_x, int mb_y)
929 { 936 {
930 if (mode == DC_PRED8x8) { 937 if (mode == DC_PRED8x8) {
931 if (!mb_x) { 938 if (!mb_x) {
932 mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; 939 mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
933 } else if (!mb_y) { 940 } else if (!mb_y) {
935 } 942 }
936 } 943 }
937 return mode; 944 return mode;
938 } 945 }
939 946
940 static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 947 static av_always_inline
941 uint8_t *intra4x4, int mb_x, int mb_y) 948 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
949 uint8_t *intra4x4, int mb_x, int mb_y)
942 { 950 {
943 int x, y, mode, nnz, tr; 951 int x, y, mode, nnz, tr;
944 952
945 // for the first row, we need to run xchg_mb_border to init the top edge to 127 953 // for the first row, we need to run xchg_mb_border to init the top edge to 127
946 // otherwise, skip it if we aren't going to deblock 954 // otherwise, skip it if we aren't going to deblock
1018 * @param width width of src/dst plane data 1026 * @param width width of src/dst plane data
1019 * @param height height of src/dst plane data 1027 * @param height height of src/dst plane data
1020 * @param linesize size of a single line of plane data, including padding 1028 * @param linesize size of a single line of plane data, including padding
1021 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) 1029 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1022 */ 1030 */
1023 static inline void vp8_mc(VP8Context *s, int luma, 1031 static av_always_inline
1024 uint8_t *dst, uint8_t *src, const VP56mv *mv, 1032 void vp8_mc(VP8Context *s, int luma,
1025 int x_off, int y_off, int block_w, int block_h, 1033 uint8_t *dst, uint8_t *src, const VP56mv *mv,
1026 int width, int height, int linesize, 1034 int x_off, int y_off, int block_w, int block_h,
1027 vp8_mc_func mc_func[3][3]) 1035 int width, int height, int linesize,
1036 vp8_mc_func mc_func[3][3])
1028 { 1037 {
1029 if (AV_RN32A(mv)) { 1038 if (AV_RN32A(mv)) {
1030 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; 1039 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 };
1031 int mx = (mv->x << luma)&7, mx_idx = idx[mx]; 1040 int mx = (mv->x << luma)&7, mx_idx = idx[mx];
1032 int my = (mv->y << luma)&7, my_idx = idx[my]; 1041 int my = (mv->y << luma)&7, my_idx = idx[my];
1046 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); 1055 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1047 } else 1056 } else
1048 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); 1057 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1049 } 1058 }
1050 1059
1051 static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], 1060 static av_always_inline
1052 AVFrame *ref_frame, int x_off, int y_off, 1061 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1053 int bx_off, int by_off, 1062 AVFrame *ref_frame, int x_off, int y_off,
1054 int block_w, int block_h, 1063 int bx_off, int by_off,
1055 int width, int height, VP56mv *mv) 1064 int block_w, int block_h,
1065 int width, int height, VP56mv *mv)
1056 { 1066 {
1057 VP56mv uvmv = *mv; 1067 VP56mv uvmv = *mv;
1058 1068
1059 /* Y */ 1069 /* Y */
1060 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, 1070 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off,
1081 s->put_pixels_tab[1 + (block_w == 4)]); 1091 s->put_pixels_tab[1 + (block_w == 4)]);
1082 } 1092 }
1083 1093
1084 /* Fetch pixels for estimated mv 4 macroblocks ahead. 1094 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1085 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ 1095 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1086 static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) 1096 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1087 { 1097 {
1088 /* Don't prefetch refs that haven't been used very often this frame. */ 1098 /* Don't prefetch refs that haven't been used very often this frame. */
1089 if (s->ref_count[ref-1] > (mb_xy >> 5)) { 1099 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1090 int x_off = mb_x << 4, y_off = mb_y << 4; 1100 int x_off = mb_x << 4, y_off = mb_y << 4;
1091 int mx = mb->mv.x + x_off + 8; 1101 int mx = mb->mv.x + x_off + 8;
1099 } 1109 }
1100 1110
1101 /** 1111 /**
1102 * Apply motion vectors to prediction buffer, chapter 18. 1112 * Apply motion vectors to prediction buffer, chapter 18.
1103 */ 1113 */
1104 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 1114 static av_always_inline
1105 int mb_x, int mb_y) 1115 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1116 int mb_x, int mb_y)
1106 { 1117 {
1107 int x_off = mb_x << 4, y_off = mb_y << 4; 1118 int x_off = mb_x << 4, y_off = mb_y << 4;
1108 int width = 16*s->mb_width, height = 16*s->mb_height; 1119 int width = 16*s->mb_width, height = 16*s->mb_height;
1109 AVFrame *ref = s->framep[mb->ref_frame]; 1120 AVFrame *ref = s->framep[mb->ref_frame];
1110 VP56mv *bmv = mb->bmv; 1121 VP56mv *bmv = mb->bmv;
1183 8, 8, 8, 8, width, height, &bmv[3]); 1194 8, 8, 8, 8, width, height, &bmv[3]);
1184 break; 1195 break;
1185 } 1196 }
1186 } 1197 }
1187 1198
1188 static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) 1199 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1189 { 1200 {
1190 int x, y, ch; 1201 int x, y, ch;
1191 1202
1192 if (mb->mode != MODE_I4x4) { 1203 if (mb->mode != MODE_I4x4) {
1193 uint8_t *y_dst = dst[0]; 1204 uint8_t *y_dst = dst[0];
1234 } 1245 }
1235 } 1246 }
1236 } 1247 }
1237 } 1248 }
1238 1249
1239 static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) 1250 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1240 { 1251 {
1241 int interior_limit, filter_level; 1252 int interior_limit, filter_level;
1242 1253
1243 if (s->segmentation.enabled) { 1254 if (s->segmentation.enabled) {
1244 filter_level = s->segmentation.filter_level[s->segment]; 1255 filter_level = s->segmentation.filter_level[s->segment];
1274 f->filter_level = filter_level; 1285 f->filter_level = filter_level;
1275 f->inner_limit = interior_limit; 1286 f->inner_limit = interior_limit;
1276 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; 1287 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1277 } 1288 }
1278 1289
1279 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) 1290 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1280 { 1291 {
1281 int mbedge_lim, bedge_lim, hev_thresh; 1292 int mbedge_lim, bedge_lim, hev_thresh;
1282 int filter_level = f->filter_level; 1293 int filter_level = f->filter_level;
1283 int inner_limit = f->inner_limit; 1294 int inner_limit = f->inner_limit;
1284 int inner_filter = f->inner_filter; 1295 int inner_filter = f->inner_filter;
1343 uvlinesize, bedge_lim, 1354 uvlinesize, bedge_lim,
1344 inner_limit, hev_thresh); 1355 inner_limit, hev_thresh);
1345 } 1356 }
1346 } 1357 }
1347 1358
1348 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) 1359 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1349 { 1360 {
1350 int mbedge_lim, bedge_lim; 1361 int mbedge_lim, bedge_lim;
1351 int filter_level = f->filter_level; 1362 int filter_level = f->filter_level;
1352 int inner_limit = f->inner_limit; 1363 int inner_limit = f->inner_limit;
1353 int inner_filter = f->inner_filter; 1364 int inner_filter = f->inner_filter;