comparison vp8.c @ 12339:57fc7f2d7b28 libavcodec

only store intra prediction modes on the boundary for keyframes, not as a plane. inter-frame behaviour unchanged.
author skal
date Mon, 02 Aug 2010 09:44:53 +0000
parents e84378ff89ca
children 2d15f62f4f8a
comparison
equal deleted inserted replaced
12338:d62e974e36af 12339:57fc7f2d7b28
86 VP8Macroblock *macroblocks; 86 VP8Macroblock *macroblocks;
87 VP8Macroblock *macroblocks_base; 87 VP8Macroblock *macroblocks_base;
88 VP8FilterStrength *filter_strength; 88 VP8FilterStrength *filter_strength;
89 int mb_stride; 89 int mb_stride;
90 90
91 uint8_t *intra4x4_pred_mode; 91 uint8_t *intra4x4_pred_mode_top;
92 uint8_t *intra4x4_pred_mode_base; 92 uint8_t intra4x4_pred_mode_left[4];
93 uint8_t *segmentation_map; 93 uint8_t *segmentation_map;
94 int b4_stride; 94 int b4_stride;
95 95
96 /** 96 /**
97 * Cache of the top row needed for intra prediction 97 * Cache of the top row needed for intra prediction
209 avctx->release_buffer(avctx, &s->frames[i]); 209 avctx->release_buffer(avctx, &s->frames[i]);
210 memset(s->framep, 0, sizeof(s->framep)); 210 memset(s->framep, 0, sizeof(s->framep));
211 211
212 av_freep(&s->macroblocks_base); 212 av_freep(&s->macroblocks_base);
213 av_freep(&s->filter_strength); 213 av_freep(&s->filter_strength);
214 av_freep(&s->intra4x4_pred_mode_base); 214 av_freep(&s->intra4x4_pred_mode_top);
215 av_freep(&s->top_nnz); 215 av_freep(&s->top_nnz);
216 av_freep(&s->edge_emu_buffer); 216 av_freep(&s->edge_emu_buffer);
217 av_freep(&s->top_border); 217 av_freep(&s->top_border);
218 av_freep(&s->segmentation_map); 218 av_freep(&s->segmentation_map);
219 219
220 s->macroblocks = NULL; 220 s->macroblocks = NULL;
221 s->intra4x4_pred_mode = NULL;
222 } 221 }
223 222
224 static int update_dimensions(VP8Context *s, int width, int height) 223 static int update_dimensions(VP8Context *s, int width, int height)
225 { 224 {
226 int i;
227
228 if (avcodec_check_dimensions(s->avctx, width, height)) 225 if (avcodec_check_dimensions(s->avctx, width, height))
229 return AVERROR_INVALIDDATA; 226 return AVERROR_INVALIDDATA;
230 227
231 vp8_decode_flush(s->avctx); 228 vp8_decode_flush(s->avctx);
232 229
240 s->mb_stride = s->mb_width+1; 237 s->mb_stride = s->mb_width+1;
241 s->b4_stride = 4*s->mb_stride; 238 s->b4_stride = 4*s->mb_stride;
242 239
243 s->macroblocks_base = av_mallocz((s->mb_stride+s->mb_height*2+2)*sizeof(*s->macroblocks)); 240 s->macroblocks_base = av_mallocz((s->mb_stride+s->mb_height*2+2)*sizeof(*s->macroblocks));
244 s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength)); 241 s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength));
245 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); 242 s->intra4x4_pred_mode_top = av_mallocz(s->b4_stride*4);
246 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); 243 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
247 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); 244 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
248 s->segmentation_map = av_mallocz(s->mb_stride*s->mb_height); 245 s->segmentation_map = av_mallocz(s->mb_stride*s->mb_height);
249 246
250 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_base || 247 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
251 !s->top_nnz || !s->top_border || !s->segmentation_map) 248 !s->top_nnz || !s->top_border || !s->segmentation_map)
252 return AVERROR(ENOMEM); 249 return AVERROR(ENOMEM);
253 250
254 s->macroblocks = s->macroblocks_base + 1; 251 s->macroblocks = s->macroblocks_base + 1;
255 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride;
256
257 memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride);
258 for (i = 0; i < 4*s->mb_height; i++)
259 s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED;
260 252
261 return 0; 253 return 0;
262 } 254 }
263 255
264 static void parse_segment_info(VP8Context *s) 256 static void parse_segment_info(VP8Context *s)
691 683
692 return num; 684 return num;
693 } 685 }
694 686
695 static av_always_inline 687 static av_always_inline
696 void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, 688 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
697 int stride, int keyframe) 689 int mb_x, int keyframe)
698 { 690 {
699 int x, y, t, l, i; 691 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
700
701 if (keyframe) { 692 if (keyframe) {
702 const uint8_t *ctx; 693 int x, y;
694 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
695 uint8_t* const left = s->intra4x4_pred_mode_left;
703 for (y = 0; y < 4; y++) { 696 for (y = 0; y < 4; y++) {
704 for (x = 0; x < 4; x++) { 697 for (x = 0; x < 4; x++) {
705 t = intra4x4[x - stride]; 698 const uint8_t *ctx;
706 l = intra4x4[x - 1]; 699 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
707 ctx = vp8_pred4x4_prob_intra[t][l]; 700 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
708 intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); 701 left[y] = top[x] = *intra4x4;
702 intra4x4++;
709 } 703 }
710 intra4x4 += stride;
711 } 704 }
712 } else { 705 } else {
706 int i;
713 for (i = 0; i < 16; i++) 707 for (i = 0; i < 16; i++)
714 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); 708 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
715 } 709 }
716 } 710 }
717 711
718 static av_always_inline 712 static av_always_inline
719 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 713 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment)
720 uint8_t *intra4x4, uint8_t *segment)
721 { 714 {
722 VP56RangeCoder *c = &s->c; 715 VP56RangeCoder *c = &s->c;
723 716
724 if (s->segmentation.update_map) 717 if (s->segmentation.update_map)
725 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); 718 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
729 722
730 if (s->keyframe) { 723 if (s->keyframe) {
731 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); 724 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
732 725
733 if (mb->mode == MODE_I4x4) { 726 if (mb->mode == MODE_I4x4) {
734 decode_intra4x4_modes(c, intra4x4, s->b4_stride, 1); 727 decode_intra4x4_modes(s, c, mb_x, 1);
735 } else 728 } else {
736 fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); 729 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
730 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
731 AV_WN32A(s->intra4x4_pred_mode_left, modes);
732 }
737 733
738 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); 734 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
739 mb->ref_frame = VP56_FRAME_CURRENT; 735 mb->ref_frame = VP56_FRAME_CURRENT;
740 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { 736 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
741 VP56mv near[2], best; 737 VP56mv near[2], best;
784 } else { 780 } else {
785 // intra MB, 16.1 781 // intra MB, 16.1
786 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); 782 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
787 783
788 if (mb->mode == MODE_I4x4) 784 if (mb->mode == MODE_I4x4)
789 decode_intra4x4_modes(c, intra4x4, 4, 0); 785 decode_intra4x4_modes(s, c, mb_x, 0);
790 786
791 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); 787 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
792 mb->ref_frame = VP56_FRAME_CURRENT; 788 mb->ref_frame = VP56_FRAME_CURRENT;
793 mb->partitioning = VP8_SPLITMVMODE_NONE; 789 mb->partitioning = VP8_SPLITMVMODE_NONE;
794 AV_ZERO32(&mb->bmv[0]); 790 AV_ZERO32(&mb->bmv[0]);
976 return mode; 972 return mode;
977 } 973 }
978 974
979 static av_always_inline 975 static av_always_inline
980 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 976 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
981 uint8_t *intra4x4, int mb_x, int mb_y) 977 int mb_x, int mb_y)
982 { 978 {
983 int x, y, mode, nnz, tr; 979 int x, y, mode, nnz, tr;
984 980
985 // for the first row, we need to run xchg_mb_border to init the top edge to 127 981 // for the first row, we need to run xchg_mb_border to init the top edge to 127
986 // otherwise, skip it if we aren't going to deblock 982 // otherwise, skip it if we aren't going to deblock
992 if (mb->mode < MODE_I4x4) { 988 if (mb->mode < MODE_I4x4) {
993 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); 989 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y);
994 s->hpc.pred16x16[mode](dst[0], s->linesize); 990 s->hpc.pred16x16[mode](dst[0], s->linesize);
995 } else { 991 } else {
996 uint8_t *ptr = dst[0]; 992 uint8_t *ptr = dst[0];
997 int stride = s->keyframe ? s->b4_stride : 4; 993 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
998 994
999 // all blocks on the right edge of the macroblock use bottom edge 995 // all blocks on the right edge of the macroblock use bottom edge
1000 // the top macroblock for their topright edge 996 // the top macroblock for their topright edge
1001 uint8_t *tr_right = ptr - s->linesize + 16; 997 uint8_t *tr_right = ptr - s->linesize + 16;
1002 998
1027 } 1023 }
1028 topright += 4; 1024 topright += 4;
1029 } 1025 }
1030 1026
1031 ptr += 4*s->linesize; 1027 ptr += 4*s->linesize;
1032 intra4x4 += stride; 1028 intra4x4 += 4;
1033 } 1029 }
1034 } 1030 }
1035 1031
1036 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); 1032 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y);
1037 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); 1033 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1514 memset(s->macroblocks, 0, (s->mb_width + s->mb_height*2)*sizeof(*s->macroblocks)); 1510 memset(s->macroblocks, 0, (s->mb_width + s->mb_height*2)*sizeof(*s->macroblocks));
1515 1511
1516 // top edge of 127 for intra prediction 1512 // top edge of 127 for intra prediction
1517 memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border)); 1513 memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border));
1518 memset(s->ref_count, 0, sizeof(s->ref_count)); 1514 memset(s->ref_count, 0, sizeof(s->ref_count));
1515 if (s->keyframe)
1516 memset(s->intra4x4_pred_mode_top, DC_PRED, s->b4_stride*4);
1519 1517
1520 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { 1518 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1521 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; 1519 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1522 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; 1520 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1523 uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride;
1524 uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride; 1521 uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride;
1525 int mb_xy = mb_y * s->mb_stride; 1522 int mb_xy = mb_y * s->mb_stride;
1526 uint8_t *dst[3] = { 1523 uint8_t *dst[3] = {
1527 curframe->data[0] + 16*mb_y*s->linesize, 1524 curframe->data[0] + 16*mb_y*s->linesize,
1528 curframe->data[1] + 8*mb_y*s->uvlinesize, 1525 curframe->data[1] + 8*mb_y*s->uvlinesize,
1529 curframe->data[2] + 8*mb_y*s->uvlinesize 1526 curframe->data[2] + 8*mb_y*s->uvlinesize
1530 }; 1527 };
1531 1528
1532 memset(s->left_nnz, 0, sizeof(s->left_nnz)); 1529 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1530 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1533 1531
1534 // left edge of 129 for intra prediction 1532 // left edge of 129 for intra prediction
1535 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) 1533 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE))
1536 for (i = 0; i < 3; i++) 1534 for (i = 0; i < 3; i++)
1537 for (y = 0; y < 16>>!!i; y++) 1535 for (y = 0; y < 16>>!!i; y++)
1538 dst[i][y*curframe->linesize[i]-1] = 129; 1536 dst[i][y*curframe->linesize[i]-1] = 129;
1539 if (mb_y) 1537 if (mb_y)
1540 memset(s->top_border, 129, sizeof(*s->top_border)); 1538 memset(s->top_border, 129, sizeof(*s->top_border));
1541 1539
1542 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { 1540 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1543 uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb;
1544 uint8_t *segment_mb = segment_map+mb_x; 1541 uint8_t *segment_mb = segment_map+mb_x;
1545 1542
1546 /* Prefetch the current frame, 4 MBs ahead */ 1543 /* Prefetch the current frame, 4 MBs ahead */
1547 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); 1544 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1548 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); 1545 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1549 1546
1550 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb, segment_mb); 1547 decode_mb_mode(s, mb, mb_x, mb_y, segment_mb);
1551 1548
1552 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); 1549 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1553 1550
1554 if (!mb->skip) 1551 if (!mb->skip)
1555 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); 1552 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1556 1553
1557 if (mb->mode <= MODE_I4x4) 1554 if (mb->mode <= MODE_I4x4)
1558 intra_predict(s, dst, mb, intra4x4_mb, mb_x, mb_y); 1555 intra_predict(s, dst, mb, mb_x, mb_y);
1559 else 1556 else
1560 inter_predict(s, dst, mb, mb_x, mb_y); 1557 inter_predict(s, dst, mb, mb_x, mb_y);
1561 1558
1562 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); 1559 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1563 1560