Mercurial > libavcodec.hg
comparison vp8.c @ 12215:58d828f9810f libavcodec
Add prefetching to VP8 decoder
~5% faster overall, probably depends on CPU and resolution.
author | darkshikari |
---|---|
date | Thu, 22 Jul 2010 03:09:10 +0000 |
parents | 3465e53116e5 |
children | f6b229456bdf |
comparison
equal
deleted
inserted
replaced
12214:657d353cd515 | 12215:58d828f9810f |
---|---|
1052 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, | 1052 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, |
1053 block_w, block_h, width, height, s->uvlinesize, | 1053 block_w, block_h, width, height, s->uvlinesize, |
1054 s->put_pixels_tab[1 + (block_w == 4)]); | 1054 s->put_pixels_tab[1 + (block_w == 4)]); |
1055 } | 1055 } |
1056 | 1056 |
1057 /* Fetch pixels for estimated mv 4 macroblocks ahead. | |
1058 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ | |
1059 static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int x_off, int y_off, int ref) | |
1060 { | |
1061 if (mb->ref_frame != VP56_FRAME_CURRENT) { | |
1062 int mx = mb->mv.x + x_off + 8; | |
1063 int my = mb->mv.y + y_off; | |
1064 uint8_t **src= s->framep[mb->ref_frame]->data; | |
1065 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; | |
1066 s->dsp.prefetch(src[0]+off, s->linesize, 4); | |
1067 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64; | |
1068 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |
1069 } | |
1070 } | |
1071 | |
1057 /** | 1072 /** |
1058 * Apply motion vectors to prediction buffer, chapter 18. | 1073 * Apply motion vectors to prediction buffer, chapter 18. |
1059 */ | 1074 */ |
1060 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | 1075 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
1061 int mb_x, int mb_y) | 1076 int mb_x, int mb_y) |
1062 { | 1077 { |
1063 int x_off = mb_x << 4, y_off = mb_y << 4; | 1078 int x_off = mb_x << 4, y_off = mb_y << 4; |
1064 int width = 16*s->mb_width, height = 16*s->mb_height; | 1079 int width = 16*s->mb_width, height = 16*s->mb_height; |
1080 | |
1081 prefetch_motion(s, mb, mb_x, mb_y, x_off, y_off, VP56_FRAME_PREVIOUS); | |
1065 | 1082 |
1066 if (mb->mode < VP8_MVMODE_SPLIT) { | 1083 if (mb->mode < VP8_MVMODE_SPLIT) { |
1067 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | 1084 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
1068 0, 0, 16, 16, width, height, &mb->mv); | 1085 0, 0, 16, 16, width, height, &mb->mv); |
1069 } else switch (mb->partitioning) { | 1086 } else switch (mb->partitioning) { |
1135 0, 8, 8, 8, width, height, &mb->bmv[2]); | 1152 0, 8, 8, 8, width, height, &mb->bmv[2]); |
1136 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | 1153 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
1137 8, 8, 8, 8, width, height, &mb->bmv[3]); | 1154 8, 8, 8, 8, width, height, &mb->bmv[3]); |
1138 break; | 1155 break; |
1139 } | 1156 } |
1157 | |
1158 prefetch_motion(s, mb, mb_x, mb_y, x_off, y_off, VP56_FRAME_GOLDEN); | |
1140 } | 1159 } |
1141 | 1160 |
1142 static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst, | 1161 static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst, |
1143 VP8Macroblock *mb) | 1162 VP8Macroblock *mb) |
1144 { | 1163 { |
1429 dst[i][y*curframe->linesize[i]-1] = 129; | 1448 dst[i][y*curframe->linesize[i]-1] = 129; |
1430 if (mb_y) | 1449 if (mb_y) |
1431 memset(s->top_border, 129, sizeof(*s->top_border)); | 1450 memset(s->top_border, 129, sizeof(*s->top_border)); |
1432 | 1451 |
1433 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | 1452 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { |
1453 /* Prefetch the current frame, 4 MBs ahead */ | |
1454 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); | |
1455 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | |
1456 | |
1434 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x); | 1457 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x); |
1435 | 1458 |
1436 if (!mb->skip) | 1459 if (!mb->skip) |
1437 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); | 1460 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); |
1438 else { | 1461 else { |