Mercurial > libavcodec.hg
comparison vp8.c @ 12221:45852dac8338 libavcodec
Avoid tracking i4x4 modes in P-frames in VP8
As in the previous commit, they aren't used for context selection, so it saves
memory this way.
author | darkshikari |
---|---|
date | Thu, 22 Jul 2010 07:04:45 +0000 |
parents | 0f635b1f7861 |
children | 7acdbfd2a222 |
comparison
equal
deleted
inserted
replaced
12220:0f635b1f7861 | 12221:45852dac8338 |
---|---|
107 * 1 -> dc-only (special transform) | 107 * 1 -> dc-only (special transform) |
108 * 2+-> full transform | 108 * 2+-> full transform |
109 */ | 109 */ |
110 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; | 110 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; |
111 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; | 111 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; |
112 uint8_t intra4x4_pred_mode_mb[16]; | |
112 | 113 |
113 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock | 114 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock |
114 | 115 |
115 int mbskip_enabled; | 116 int mbskip_enabled; |
116 int sign_bias[4]; ///< one state [0, 1] per ref frame type | 117 int sign_bias[4]; ///< one state [0, 1] per ref frame type |
668 } | 669 } |
669 | 670 |
670 static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, | 671 static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, |
671 int stride, int keyframe) | 672 int stride, int keyframe) |
672 { | 673 { |
673 int x, y, t, l; | 674 int x, y, t, l, i; |
674 const uint8_t *ctx = vp8_pred4x4_prob_inter; | 675 |
675 | 676 if (keyframe) { |
676 for (y = 0; y < 4; y++) { | 677 const uint8_t *ctx; |
677 for (x = 0; x < 4; x++) { | 678 for (y = 0; y < 4; y++) { |
678 if (keyframe) { | 679 for (x = 0; x < 4; x++) { |
679 t = intra4x4[x - stride]; | 680 t = intra4x4[x - stride]; |
680 l = intra4x4[x - 1]; | 681 l = intra4x4[x - 1]; |
681 ctx = vp8_pred4x4_prob_intra[t][l]; | 682 ctx = vp8_pred4x4_prob_intra[t][l]; |
683 intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); | |
682 } | 684 } |
683 intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); | 685 intra4x4 += stride; |
684 } | 686 } |
685 intra4x4 += stride; | 687 } else { |
688 for (i = 0; i < 16; i++) | |
689 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); | |
686 } | 690 } |
687 } | 691 } |
688 | 692 |
689 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, | 693 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
690 uint8_t *intra4x4) | 694 uint8_t *intra4x4) |
751 } else { | 755 } else { |
752 // intra MB, 16.1 | 756 // intra MB, 16.1 |
753 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); | 757 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); |
754 | 758 |
755 if (mb->mode == MODE_I4x4) | 759 if (mb->mode == MODE_I4x4) |
756 decode_intra4x4_modes(c, intra4x4, s->b4_stride, 0); | 760 decode_intra4x4_modes(c, intra4x4, 4, 0); |
757 | 761 |
758 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); | 762 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); |
759 mb->ref_frame = VP56_FRAME_CURRENT; | 763 mb->ref_frame = VP56_FRAME_CURRENT; |
760 } | 764 } |
761 } | 765 } |
920 } | 924 } |
921 return mode; | 925 return mode; |
922 } | 926 } |
923 | 927 |
924 static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | 928 static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
925 uint8_t *bmode, int mb_x, int mb_y) | 929 uint8_t *intra4x4, int mb_x, int mb_y) |
926 { | 930 { |
927 int x, y, mode, nnz, tr; | 931 int x, y, mode, nnz, tr; |
928 | 932 |
929 // for the first row, we need to run xchg_mb_border to init the top edge to 127 | 933 // for the first row, we need to run xchg_mb_border to init the top edge to 127 |
930 // otherwise, skip it if we aren't going to deblock | 934 // otherwise, skip it if we aren't going to deblock |
936 if (mb->mode < MODE_I4x4) { | 940 if (mb->mode < MODE_I4x4) { |
937 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); | 941 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); |
938 s->hpc.pred16x16[mode](dst[0], s->linesize); | 942 s->hpc.pred16x16[mode](dst[0], s->linesize); |
939 } else { | 943 } else { |
940 uint8_t *ptr = dst[0]; | 944 uint8_t *ptr = dst[0]; |
945 int stride = s->keyframe ? s->b4_stride : 4; | |
941 | 946 |
942 // all blocks on the right edge of the macroblock use bottom edge | 947 // all blocks on the right edge of the macroblock use bottom edge |
943 // the top macroblock for their topright edge | 948 // the top macroblock for their topright edge |
944 uint8_t *tr_right = ptr - s->linesize + 16; | 949 uint8_t *tr_right = ptr - s->linesize + 16; |
945 | 950 |
954 uint8_t *topright = ptr + 4 - s->linesize; | 959 uint8_t *topright = ptr + 4 - s->linesize; |
955 for (x = 0; x < 4; x++) { | 960 for (x = 0; x < 4; x++) { |
956 if (x == 3) | 961 if (x == 3) |
957 topright = tr_right; | 962 topright = tr_right; |
958 | 963 |
959 s->hpc.pred4x4[bmode[x]](ptr+4*x, topright, s->linesize); | 964 s->hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s->linesize); |
960 | 965 |
961 nnz = s->non_zero_count_cache[y][x]; | 966 nnz = s->non_zero_count_cache[y][x]; |
962 if (nnz) { | 967 if (nnz) { |
963 if (nnz == 1) | 968 if (nnz == 1) |
964 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); | 969 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); |
967 } | 972 } |
968 topright += 4; | 973 topright += 4; |
969 } | 974 } |
970 | 975 |
971 ptr += 4*s->linesize; | 976 ptr += 4*s->linesize; |
972 bmode += s->b4_stride; | 977 intra4x4 += stride; |
973 } | 978 } |
974 } | 979 } |
975 | 980 |
976 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); | 981 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); |
977 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); | 982 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); |
1455 dst[i][y*curframe->linesize[i]-1] = 129; | 1460 dst[i][y*curframe->linesize[i]-1] = 129; |
1456 if (mb_y) | 1461 if (mb_y) |
1457 memset(s->top_border, 129, sizeof(*s->top_border)); | 1462 memset(s->top_border, 129, sizeof(*s->top_border)); |
1458 | 1463 |
1459 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | 1464 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { |
1465 uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb; | |
1466 | |
1460 /* Prefetch the current frame, 4 MBs ahead */ | 1467 /* Prefetch the current frame, 4 MBs ahead */ |
1461 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); | 1468 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); |
1462 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | 1469 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); |
1463 | 1470 |
1464 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x); | 1471 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb); |
1465 | 1472 |
1466 if (!mb->skip) | 1473 if (!mb->skip) |
1467 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); | 1474 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); |
1468 else { | 1475 else { |
1469 AV_ZERO128(s->non_zero_count_cache); // luma | 1476 AV_ZERO128(s->non_zero_count_cache); // luma |
1470 AV_ZERO64(s->non_zero_count_cache[4]); // chroma | 1477 AV_ZERO64(s->non_zero_count_cache[4]); // chroma |
1471 } | 1478 } |
1472 | 1479 |
1473 if (mb->mode <= MODE_I4x4) { | 1480 if (mb->mode <= MODE_I4x4) { |
1474 intra_predict(s, dst, mb, intra4x4 + 4*mb_x, mb_x, mb_y); | 1481 intra_predict(s, dst, mb, intra4x4_mb, mb_x, mb_y); |
1475 memset(mb->bmv, 0, sizeof(mb->bmv)); | 1482 memset(mb->bmv, 0, sizeof(mb->bmv)); |
1476 } else { | 1483 } else { |
1477 inter_predict(s, dst, mb, mb_x, mb_y); | 1484 inter_predict(s, dst, mb, mb_x, mb_y); |
1478 } | 1485 } |
1479 | 1486 |