comparison vp8.c @ 12223:93e27a5401de libavcodec

Convert VP8 macroblock structures to a ring buffer. Uses a slightly nonintuitive ring buffer size of (width+height*2) to simplify addressing logic. Also split out the segmentation map to a separate structure, necessary to implement the ring buffer.
author darkshikari
date Thu, 22 Jul 2010 11:45:18 +0000
parents 7acdbfd2a222
children 5b7d690b761b
comparison
equal deleted inserted replaced
12222:7acdbfd2a222 12223:93e27a5401de
29 #include "rectangle.h" 29 #include "rectangle.h"
30 30
31 typedef struct { 31 typedef struct {
32 uint8_t filter_level; 32 uint8_t filter_level;
33 uint8_t inner_limit; 33 uint8_t inner_limit;
34 uint8_t inner_filter;
34 } VP8FilterStrength; 35 } VP8FilterStrength;
35 36
36 typedef struct { 37 typedef struct {
37 uint8_t segment; 38 uint8_t segment;
38 uint8_t skip; 39 uint8_t skip;
87 VP8FilterStrength *filter_strength; 88 VP8FilterStrength *filter_strength;
88 int mb_stride; 89 int mb_stride;
89 90
90 uint8_t *intra4x4_pred_mode; 91 uint8_t *intra4x4_pred_mode;
91 uint8_t *intra4x4_pred_mode_base; 92 uint8_t *intra4x4_pred_mode_base;
93 uint8_t *segmentation_map;
92 int b4_stride; 94 int b4_stride;
93 95
94 /** 96 /**
95 * Cache of the top row needed for intra prediction 97 * Cache of the top row needed for intra prediction
96 * 16 for luma, 8 for each chroma plane 98 * 16 for luma, 8 for each chroma plane
210 av_freep(&s->macroblocks_base); 212 av_freep(&s->macroblocks_base);
211 av_freep(&s->intra4x4_pred_mode_base); 213 av_freep(&s->intra4x4_pred_mode_base);
212 av_freep(&s->top_nnz); 214 av_freep(&s->top_nnz);
213 av_freep(&s->edge_emu_buffer); 215 av_freep(&s->edge_emu_buffer);
214 av_freep(&s->top_border); 216 av_freep(&s->top_border);
217 av_freep(&s->segmentation_map);
215 218
216 s->macroblocks = NULL; 219 s->macroblocks = NULL;
217 s->intra4x4_pred_mode = NULL; 220 s->intra4x4_pred_mode = NULL;
218 } 221 }
219 222
234 // we allocate a border around the top/left of intra4x4 modes 237 // we allocate a border around the top/left of intra4x4 modes
235 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle 238 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle
236 s->mb_stride = s->mb_width+1; 239 s->mb_stride = s->mb_width+1;
237 s->b4_stride = 4*s->mb_stride; 240 s->b4_stride = 4*s->mb_stride;
238 241
239 s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks)); 242 s->macroblocks_base = av_mallocz((s->mb_stride+s->mb_height*2+2)*sizeof(*s->macroblocks));
240 s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength)); 243 s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength));
241 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); 244 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1));
242 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); 245 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
243 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); 246 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
244 247 s->segmentation_map = av_mallocz(s->mb_stride*s->mb_height);
245 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border) 248
249 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_base ||
250 !s->top_nnz || !s->top_border || !s->segmentation_map)
246 return AVERROR(ENOMEM); 251 return AVERROR(ENOMEM);
247 252
248 s->macroblocks = s->macroblocks_base + 1 + s->mb_stride; 253 s->macroblocks = s->macroblocks_base + 1;
249 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; 254 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride;
250 255
251 memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride); 256 memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride);
252 for (i = 0; i < 4*s->mb_height; i++) 257 for (i = 0; i < 4*s->mb_height; i++)
253 s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED; 258 s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED;
528 } 533 }
529 534
530 static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 535 static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
531 VP56mv near[2], VP56mv *best, uint8_t cnt[4]) 536 VP56mv near[2], VP56mv *best, uint8_t cnt[4])
532 { 537 {
533 int mb_stride = s->mb_stride; 538 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
534 VP8Macroblock *mb_edge[3] = { mb - mb_stride /* top */, 539 mb - 1 /* left */,
535 mb - 1 /* left */, 540 mb + 1 /* top-left */ };
536 mb - mb_stride - 1 /* top-left */ };
537 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; 541 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
538 VP56mv near_mv[4] = {{ 0 }}; 542 VP56mv near_mv[4] = {{ 0 }};
539 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; 543 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
540 int idx = CNT_ZERO; 544 int idx = CNT_ZERO;
541 int best_idx = CNT_ZERO; 545 int best_idx = CNT_ZERO;
627 VP8Macroblock *mb, VP56mv *base_mv) 631 VP8Macroblock *mb, VP56mv *base_mv)
628 { 632 {
629 int part_idx = mb->partitioning = 633 int part_idx = mb->partitioning =
630 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); 634 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob);
631 int n, num = vp8_mbsplit_count[part_idx]; 635 int n, num = vp8_mbsplit_count[part_idx];
632 VP8Macroblock *top_mb = &mb[-s->mb_stride]; 636 VP8Macroblock *top_mb = &mb[2];
633 VP8Macroblock *left_mb = &mb[-1]; 637 VP8Macroblock *left_mb = &mb[-1];
634 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], 638 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
635 *mbsplits_top = vp8_mbsplits[top_mb->partitioning], 639 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
636 *mbsplits_cur = vp8_mbsplits[part_idx], 640 *mbsplits_cur = vp8_mbsplits[part_idx],
637 *firstidx = vp8_mbfirstidx[part_idx]; 641 *firstidx = vp8_mbfirstidx[part_idx];
638 VP56mv *top_mv = top_mb->bmv; 642 VP56mv *top_mv = top_mb->bmv;
639 VP56mv *left_mv = left_mb->bmv; 643 VP56mv *left_mv = left_mb->bmv;
640 VP56mv *cur_mv = mb->bmv; 644 VP56mv *cur_mv = mb->bmv;
641 645
642 for (n = 0; n < num; n++) { 646 for (n = 0; n < num; n++) {
643 int k = firstidx[n]; 647 int k = firstidx[n];
644 uint32_t left, above; 648 uint32_t left, above;
645 const uint8_t *submv_prob; 649 const uint8_t *submv_prob;
696 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); 700 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
697 } 701 }
698 } 702 }
699 703
700 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 704 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
701 uint8_t *intra4x4) 705 uint8_t *intra4x4, uint8_t *segment)
702 { 706 {
703 VP56RangeCoder *c = &s->c; 707 VP56RangeCoder *c = &s->c;
704 708
705 if (s->segmentation.update_map) 709 if (s->segmentation.update_map)
706 mb->segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); 710 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
711 mb->segment = *segment;
707 712
708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; 713 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
709 714
710 if (s->keyframe) { 715 if (s->keyframe) {
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); 716 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
1254 } 1259 }
1255 interior_limit = FFMAX(interior_limit, 1); 1260 interior_limit = FFMAX(interior_limit, 1);
1256 1261
1257 f->filter_level = filter_level; 1262 f->filter_level = filter_level;
1258 f->inner_limit = interior_limit; 1263 f->inner_limit = interior_limit;
1259 } 1264 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1260 1265 }
1261 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, VP8FilterStrength *f, int mb_x, int mb_y) 1266
1267 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1262 { 1268 {
1263 int mbedge_lim, bedge_lim, hev_thresh; 1269 int mbedge_lim, bedge_lim, hev_thresh;
1264 int filter_level = f->filter_level; 1270 int filter_level = f->filter_level;
1265 int inner_limit = f->inner_limit; 1271 int inner_limit = f->inner_limit;
1272 int inner_filter = f->inner_filter;
1266 1273
1267 if (!filter_level) 1274 if (!filter_level)
1268 return; 1275 return;
1269 1276
1270 mbedge_lim = 2*(filter_level+2) + inner_limit; 1277 mbedge_lim = 2*(filter_level+2) + inner_limit;
1286 mbedge_lim, inner_limit, hev_thresh); 1293 mbedge_lim, inner_limit, hev_thresh);
1287 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize, 1294 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize,
1288 mbedge_lim, inner_limit, hev_thresh); 1295 mbedge_lim, inner_limit, hev_thresh);
1289 } 1296 }
1290 1297
1291 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { 1298 if (inner_filter) {
1292 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, s->linesize, bedge_lim, 1299 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, s->linesize, bedge_lim,
1293 inner_limit, hev_thresh); 1300 inner_limit, hev_thresh);
1294 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, s->linesize, bedge_lim, 1301 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, s->linesize, bedge_lim,
1295 inner_limit, hev_thresh); 1302 inner_limit, hev_thresh);
1296 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, s->linesize, bedge_lim, 1303 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, s->linesize, bedge_lim,
1305 mbedge_lim, inner_limit, hev_thresh); 1312 mbedge_lim, inner_limit, hev_thresh);
1306 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], s->uvlinesize, 1313 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], s->uvlinesize,
1307 mbedge_lim, inner_limit, hev_thresh); 1314 mbedge_lim, inner_limit, hev_thresh);
1308 } 1315 }
1309 1316
1310 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { 1317 if (inner_filter) {
1311 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*s->linesize, 1318 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*s->linesize,
1312 s->linesize, bedge_lim, 1319 s->linesize, bedge_lim,
1313 inner_limit, hev_thresh); 1320 inner_limit, hev_thresh);
1314 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*s->linesize, 1321 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*s->linesize,
1315 s->linesize, bedge_lim, 1322 s->linesize, bedge_lim,
1322 s->uvlinesize, bedge_lim, 1329 s->uvlinesize, bedge_lim,
1323 inner_limit, hev_thresh); 1330 inner_limit, hev_thresh);
1324 } 1331 }
1325 } 1332 }
1326 1333
1327 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, VP8FilterStrength *f, int mb_x, int mb_y) 1334 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1328 { 1335 {
1329 int mbedge_lim, bedge_lim; 1336 int mbedge_lim, bedge_lim;
1330 int filter_level = f->filter_level; 1337 int filter_level = f->filter_level;
1331 int inner_limit = f->inner_limit; 1338 int inner_limit = f->inner_limit;
1339 int inner_filter = f->inner_filter;
1332 1340
1333 if (!filter_level) 1341 if (!filter_level)
1334 return; 1342 return;
1335 1343
1336 mbedge_lim = 2*(filter_level+2) + inner_limit; 1344 mbedge_lim = 2*(filter_level+2) + inner_limit;
1337 bedge_lim = 2* filter_level + inner_limit; 1345 bedge_lim = 2* filter_level + inner_limit;
1338 1346
1339 if (mb_x) 1347 if (mb_x)
1340 s->vp8dsp.vp8_h_loop_filter_simple(dst, s->linesize, mbedge_lim); 1348 s->vp8dsp.vp8_h_loop_filter_simple(dst, s->linesize, mbedge_lim);
1341 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { 1349 if (inner_filter) {
1342 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, s->linesize, bedge_lim); 1350 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, s->linesize, bedge_lim);
1343 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, s->linesize, bedge_lim); 1351 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, s->linesize, bedge_lim);
1344 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, s->linesize, bedge_lim); 1352 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, s->linesize, bedge_lim);
1345 } 1353 }
1346 1354
1347 if (mb_y) 1355 if (mb_y)
1348 s->vp8dsp.vp8_v_loop_filter_simple(dst, s->linesize, mbedge_lim); 1356 s->vp8dsp.vp8_v_loop_filter_simple(dst, s->linesize, mbedge_lim);
1349 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { 1357 if (inner_filter) {
1350 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*s->linesize, s->linesize, bedge_lim); 1358 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*s->linesize, s->linesize, bedge_lim);
1351 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*s->linesize, s->linesize, bedge_lim); 1359 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*s->linesize, s->linesize, bedge_lim);
1352 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*s->linesize, s->linesize, bedge_lim); 1360 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*s->linesize, s->linesize, bedge_lim);
1353 } 1361 }
1354 } 1362 }
1355 1363
1356 static void filter_mb_row(VP8Context *s, int mb_y) 1364 static void filter_mb_row(VP8Context *s, int mb_y)
1357 { 1365 {
1358 VP8FilterStrength *f = s->filter_strength; 1366 VP8FilterStrength *f = s->filter_strength;
1359 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
1360 uint8_t *dst[3] = { 1367 uint8_t *dst[3] = {
1361 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, 1368 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1362 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, 1369 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize,
1363 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize 1370 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize
1364 }; 1371 };
1365 int mb_x; 1372 int mb_x;
1366 1373
1367 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { 1374 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1368 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); 1375 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1369 filter_mb(s, dst, mb++, f++, mb_x, mb_y); 1376 filter_mb(s, dst, f++, mb_x, mb_y);
1370 dst[0] += 16; 1377 dst[0] += 16;
1371 dst[1] += 8; 1378 dst[1] += 8;
1372 dst[2] += 8; 1379 dst[2] += 8;
1373 } 1380 }
1374 } 1381 }
1375 1382
1376 static void filter_mb_row_simple(VP8Context *s, int mb_y) 1383 static void filter_mb_row_simple(VP8Context *s, int mb_y)
1377 { 1384 {
1378 VP8FilterStrength *f = s->filter_strength; 1385 VP8FilterStrength *f = s->filter_strength;
1379 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
1380 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; 1386 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
1381 int mb_x; 1387 int mb_x;
1382 1388
1383 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { 1389 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1384 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); 1390 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1385 filter_mb_simple(s, dst, mb++, f++, mb_x, mb_y); 1391 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1386 dst += 16; 1392 dst += 16;
1387 } 1393 }
1388 } 1394 }
1389 1395
1390 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 1396 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1444 if (!s->edge_emu_buffer) 1450 if (!s->edge_emu_buffer)
1445 s->edge_emu_buffer = av_malloc(21*s->linesize); 1451 s->edge_emu_buffer = av_malloc(21*s->linesize);
1446 1452
1447 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); 1453 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1448 1454
1455 /* Zero macroblock structures for top/left prediction from outside the frame. */
1456 memset(s->macroblocks, 0, (s->mb_width + s->mb_height*2)*sizeof(*s->macroblocks));
1457
1449 // top edge of 127 for intra prediction 1458 // top edge of 127 for intra prediction
1450 memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border)); 1459 memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border));
1451 1460
1452 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { 1461 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1453 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; 1462 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1454 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; 1463 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1455 uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride; 1464 uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride;
1465 uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride;
1456 uint8_t *dst[3] = { 1466 uint8_t *dst[3] = {
1457 curframe->data[0] + 16*mb_y*s->linesize, 1467 curframe->data[0] + 16*mb_y*s->linesize,
1458 curframe->data[1] + 8*mb_y*s->uvlinesize, 1468 curframe->data[1] + 8*mb_y*s->uvlinesize,
1459 curframe->data[2] + 8*mb_y*s->uvlinesize 1469 curframe->data[2] + 8*mb_y*s->uvlinesize
1460 }; 1470 };
1469 if (mb_y) 1479 if (mb_y)
1470 memset(s->top_border, 129, sizeof(*s->top_border)); 1480 memset(s->top_border, 129, sizeof(*s->top_border));
1471 1481
1472 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { 1482 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1473 uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb; 1483 uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb;
1484 uint8_t *segment_mb = segment_map+mb_x;
1474 1485
1475 /* Prefetch the current frame, 4 MBs ahead */ 1486 /* Prefetch the current frame, 4 MBs ahead */
1476 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); 1487 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1477 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); 1488 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1478 1489
1479 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb); 1490 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb, segment_mb);
1480 1491
1481 if (!mb->skip) 1492 if (!mb->skip)
1482 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); 1493 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1483 else { 1494 else {
1484 AV_ZERO128(s->non_zero_count_cache); // luma 1495 AV_ZERO128(s->non_zero_count_cache); // luma