comparison vp8.c @ 12221:45852dac8338 libavcodec

Avoid tracking i4x4 modes in P-frames in VP8 As in the previous commit, they aren't used for context selection, so it saves memory this way.
author darkshikari
date Thu, 22 Jul 2010 07:04:45 +0000
parents 0f635b1f7861
children 7acdbfd2a222
comparison
equal deleted inserted replaced
12220:0f635b1f7861 12221:45852dac8338
107 * 1 -> dc-only (special transform) 107 * 1 -> dc-only (special transform)
108 * 2+-> full transform 108 * 2+-> full transform
109 */ 109 */
110 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; 110 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
111 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; 111 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
112 uint8_t intra4x4_pred_mode_mb[16];
112 113
113 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock 114 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock
114 115
115 int mbskip_enabled; 116 int mbskip_enabled;
116 int sign_bias[4]; ///< one state [0, 1] per ref frame type 117 int sign_bias[4]; ///< one state [0, 1] per ref frame type
668 } 669 }
669 670
670 static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, 671 static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4,
671 int stride, int keyframe) 672 int stride, int keyframe)
672 { 673 {
673 int x, y, t, l; 674 int x, y, t, l, i;
674 const uint8_t *ctx = vp8_pred4x4_prob_inter; 675
675 676 if (keyframe) {
676 for (y = 0; y < 4; y++) { 677 const uint8_t *ctx;
677 for (x = 0; x < 4; x++) { 678 for (y = 0; y < 4; y++) {
678 if (keyframe) { 679 for (x = 0; x < 4; x++) {
679 t = intra4x4[x - stride]; 680 t = intra4x4[x - stride];
680 l = intra4x4[x - 1]; 681 l = intra4x4[x - 1];
681 ctx = vp8_pred4x4_prob_intra[t][l]; 682 ctx = vp8_pred4x4_prob_intra[t][l];
683 intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
682 } 684 }
683 intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); 685 intra4x4 += stride;
684 } 686 }
685 intra4x4 += stride; 687 } else {
688 for (i = 0; i < 16; i++)
689 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
686 } 690 }
687 } 691 }
688 692
689 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 693 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
690 uint8_t *intra4x4) 694 uint8_t *intra4x4)
751 } else { 755 } else {
752 // intra MB, 16.1 756 // intra MB, 16.1
753 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); 757 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
754 758
755 if (mb->mode == MODE_I4x4) 759 if (mb->mode == MODE_I4x4)
756 decode_intra4x4_modes(c, intra4x4, s->b4_stride, 0); 760 decode_intra4x4_modes(c, intra4x4, 4, 0);
757 761
758 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); 762 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
759 mb->ref_frame = VP56_FRAME_CURRENT; 763 mb->ref_frame = VP56_FRAME_CURRENT;
760 } 764 }
761 } 765 }
920 } 924 }
921 return mode; 925 return mode;
922 } 926 }
923 927
924 static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 928 static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
925 uint8_t *bmode, int mb_x, int mb_y) 929 uint8_t *intra4x4, int mb_x, int mb_y)
926 { 930 {
927 int x, y, mode, nnz, tr; 931 int x, y, mode, nnz, tr;
928 932
929 // for the first row, we need to run xchg_mb_border to init the top edge to 127 933 // for the first row, we need to run xchg_mb_border to init the top edge to 127
930 // otherwise, skip it if we aren't going to deblock 934 // otherwise, skip it if we aren't going to deblock
936 if (mb->mode < MODE_I4x4) { 940 if (mb->mode < MODE_I4x4) {
937 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); 941 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y);
938 s->hpc.pred16x16[mode](dst[0], s->linesize); 942 s->hpc.pred16x16[mode](dst[0], s->linesize);
939 } else { 943 } else {
940 uint8_t *ptr = dst[0]; 944 uint8_t *ptr = dst[0];
945 int stride = s->keyframe ? s->b4_stride : 4;
941 946
942 // all blocks on the right edge of the macroblock use bottom edge 947 // all blocks on the right edge of the macroblock use bottom edge
943 // the top macroblock for their topright edge 948 // the top macroblock for their topright edge
944 uint8_t *tr_right = ptr - s->linesize + 16; 949 uint8_t *tr_right = ptr - s->linesize + 16;
945 950
954 uint8_t *topright = ptr + 4 - s->linesize; 959 uint8_t *topright = ptr + 4 - s->linesize;
955 for (x = 0; x < 4; x++) { 960 for (x = 0; x < 4; x++) {
956 if (x == 3) 961 if (x == 3)
957 topright = tr_right; 962 topright = tr_right;
958 963
959 s->hpc.pred4x4[bmode[x]](ptr+4*x, topright, s->linesize); 964 s->hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s->linesize);
960 965
961 nnz = s->non_zero_count_cache[y][x]; 966 nnz = s->non_zero_count_cache[y][x];
962 if (nnz) { 967 if (nnz) {
963 if (nnz == 1) 968 if (nnz == 1)
964 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); 969 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
967 } 972 }
968 topright += 4; 973 topright += 4;
969 } 974 }
970 975
971 ptr += 4*s->linesize; 976 ptr += 4*s->linesize;
972 bmode += s->b4_stride; 977 intra4x4 += stride;
973 } 978 }
974 } 979 }
975 980
976 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); 981 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y);
977 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); 982 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1455 dst[i][y*curframe->linesize[i]-1] = 129; 1460 dst[i][y*curframe->linesize[i]-1] = 129;
1456 if (mb_y) 1461 if (mb_y)
1457 memset(s->top_border, 129, sizeof(*s->top_border)); 1462 memset(s->top_border, 129, sizeof(*s->top_border));
1458 1463
1459 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { 1464 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1465 uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb;
1466
1460 /* Prefetch the current frame, 4 MBs ahead */ 1467 /* Prefetch the current frame, 4 MBs ahead */
1461 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); 1468 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1462 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); 1469 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1463 1470
1464 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x); 1471 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb);
1465 1472
1466 if (!mb->skip) 1473 if (!mb->skip)
1467 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); 1474 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1468 else { 1475 else {
1469 AV_ZERO128(s->non_zero_count_cache); // luma 1476 AV_ZERO128(s->non_zero_count_cache); // luma
1470 AV_ZERO64(s->non_zero_count_cache[4]); // chroma 1477 AV_ZERO64(s->non_zero_count_cache[4]); // chroma
1471 } 1478 }
1472 1479
1473 if (mb->mode <= MODE_I4x4) { 1480 if (mb->mode <= MODE_I4x4) {
1474 intra_predict(s, dst, mb, intra4x4 + 4*mb_x, mb_x, mb_y); 1481 intra_predict(s, dst, mb, intra4x4_mb, mb_x, mb_y);
1475 memset(mb->bmv, 0, sizeof(mb->bmv)); 1482 memset(mb->bmv, 0, sizeof(mb->bmv));
1476 } else { 1483 } else {
1477 inter_predict(s, dst, mb, mb_x, mb_y); 1484 inter_predict(s, dst, mb, mb_x, mb_y);
1478 } 1485 }
1479 1486