Mercurial > libavcodec.hg
comparison vp8.c @ 12237:f0c4dc49c8f1 libavcodec
VP8: smarter prefetching
Don't prefetch reference frames that were used less than 1/32th of the time so
far in the frame.
This helps speed up to ~2% on videos that, in many frames, make near-zero
(but not entirely zero) use of golden and/or alt-refs.
This is a very common property of videos encoded by libvpx.
author | darkshikari |
---|---|
date | Fri, 23 Jul 2010 01:59:56 +0000 |
parents | e08d65897115 |
children | 1a7903913e9b |
comparison
equal
deleted
inserted
replaced
12236:cabcd751b1e5 | 12237:f0c4dc49c8f1 |
---|---|
1082 s->put_pixels_tab[1 + (block_w == 4)]); | 1082 s->put_pixels_tab[1 + (block_w == 4)]); |
1083 } | 1083 } |
1084 | 1084 |
1085 /* Fetch pixels for estimated mv 4 macroblocks ahead. | 1085 /* Fetch pixels for estimated mv 4 macroblocks ahead. |
1086 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ | 1086 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ |
1087 static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int ref) | 1087 static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) |
1088 { | 1088 { |
1089 /* Don't prefetch refs that haven't been used yet this frame. */ | 1089 /* Don't prefetch refs that haven't been used very often this frame. */ |
1090 if (s->ref_count[ref-1]) { | 1090 if (s->ref_count[ref-1] > (mb_xy >> 5)) { |
1091 int x_off = mb_x << 4, y_off = mb_y << 4; | 1091 int x_off = mb_x << 4, y_off = mb_y << 4; |
1092 int mx = mb->mv.x + x_off + 8; | 1092 int mx = mb->mv.x + x_off + 8; |
1093 int my = mb->mv.y + y_off; | 1093 int my = mb->mv.y + y_off; |
1094 uint8_t **src= s->framep[ref]->data; | 1094 uint8_t **src= s->framep[ref]->data; |
1095 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; | 1095 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; |
1469 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | 1469 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { |
1470 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; | 1470 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; |
1471 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; | 1471 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; |
1472 uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride; | 1472 uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride; |
1473 uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride; | 1473 uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride; |
1474 int mb_xy = mb_y * s->mb_stride; | |
1474 uint8_t *dst[3] = { | 1475 uint8_t *dst[3] = { |
1475 curframe->data[0] + 16*mb_y*s->linesize, | 1476 curframe->data[0] + 16*mb_y*s->linesize, |
1476 curframe->data[1] + 8*mb_y*s->uvlinesize, | 1477 curframe->data[1] + 8*mb_y*s->uvlinesize, |
1477 curframe->data[2] + 8*mb_y*s->uvlinesize | 1478 curframe->data[2] + 8*mb_y*s->uvlinesize |
1478 }; | 1479 }; |
1485 for (y = 0; y < 16>>!!i; y++) | 1486 for (y = 0; y < 16>>!!i; y++) |
1486 dst[i][y*curframe->linesize[i]-1] = 129; | 1487 dst[i][y*curframe->linesize[i]-1] = 129; |
1487 if (mb_y) | 1488 if (mb_y) |
1488 memset(s->top_border, 129, sizeof(*s->top_border)); | 1489 memset(s->top_border, 129, sizeof(*s->top_border)); |
1489 | 1490 |
1490 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | 1491 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { |
1491 uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb; | 1492 uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb; |
1492 uint8_t *segment_mb = segment_map+mb_x; | 1493 uint8_t *segment_mb = segment_map+mb_x; |
1493 | 1494 |
1494 /* Prefetch the current frame, 4 MBs ahead */ | 1495 /* Prefetch the current frame, 4 MBs ahead */ |
1495 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); | 1496 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); |
1496 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | 1497 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); |
1497 | 1498 |
1498 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb, segment_mb); | 1499 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb, segment_mb); |
1499 | 1500 |
1500 prefetch_motion(s, mb, mb_x, mb_y, VP56_FRAME_PREVIOUS); | 1501 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); |
1501 | 1502 |
1502 if (!mb->skip) | 1503 if (!mb->skip) |
1503 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); | 1504 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); |
1504 | 1505 |
1505 if (mb->mode <= MODE_I4x4) | 1506 if (mb->mode <= MODE_I4x4) |
1506 intra_predict(s, dst, mb, intra4x4_mb, mb_x, mb_y); | 1507 intra_predict(s, dst, mb, intra4x4_mb, mb_x, mb_y); |
1507 else | 1508 else |
1508 inter_predict(s, dst, mb, mb_x, mb_y); | 1509 inter_predict(s, dst, mb, mb_x, mb_y); |
1509 | 1510 |
1510 prefetch_motion(s, mb, mb_x, mb_y, VP56_FRAME_GOLDEN); | 1511 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); |
1511 | 1512 |
1512 if (!mb->skip) { | 1513 if (!mb->skip) { |
1513 idct_mb(s, dst[0], dst[1], dst[2], mb); | 1514 idct_mb(s, dst[0], dst[1], dst[2], mb); |
1514 } else { | 1515 } else { |
1515 AV_ZERO64(s->left_nnz); | 1516 AV_ZERO64(s->left_nnz); |
1523 } | 1524 } |
1524 | 1525 |
1525 if (s->deblock_filter) | 1526 if (s->deblock_filter) |
1526 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); | 1527 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); |
1527 | 1528 |
1528 prefetch_motion(s, mb, mb_x, mb_y, VP56_FRAME_GOLDEN2); | 1529 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); |
1529 | 1530 |
1530 dst[0] += 16; | 1531 dst[0] += 16; |
1531 dst[1] += 8; | 1532 dst[1] += 8; |
1532 dst[2] += 8; | 1533 dst[2] += 8; |
1533 mb++; | |
1534 } | 1534 } |
1535 if (s->deblock_filter) { | 1535 if (s->deblock_filter) { |
1536 if (s->filter.simple) | 1536 if (s->filter.simple) |
1537 filter_mb_row_simple(s, mb_y); | 1537 filter_mb_row_simple(s, mb_y); |
1538 else | 1538 else |