comparison vp8.c @ 12222:7acdbfd2a222 libavcodec

Calculate deblock strength per-MB instead of per-row Gives better cache locality, since the VP8Macroblock structs are still in cache. Inspired by the way x264 does it.
author darkshikari
date Thu, 22 Jul 2010 07:24:22 +0000
parents 45852dac8338
children 93e27a5401de
comparison
equal deleted inserted replaced
12221:45852dac8338 12222:7acdbfd2a222
27 #include "vp8dsp.h" 27 #include "vp8dsp.h"
28 #include "h264pred.h" 28 #include "h264pred.h"
29 #include "rectangle.h" 29 #include "rectangle.h"
30 30
31 typedef struct { 31 typedef struct {
32 uint8_t filter_level;
33 uint8_t inner_limit;
34 } VP8FilterStrength;
35
36 typedef struct {
32 uint8_t segment; 37 uint8_t segment;
33 uint8_t skip; 38 uint8_t skip;
34 // todo: make it possible to check for at least (i4x4 or split_mv) 39 // todo: make it possible to check for at least (i4x4 or split_mv)
35 // in one op. are others needed? 40 // in one op. are others needed?
36 uint8_t mode; 41 uint8_t mode;
77 int num_coeff_partitions; 82 int num_coeff_partitions;
78 VP56RangeCoder coeff_partition[8]; 83 VP56RangeCoder coeff_partition[8];
79 84
80 VP8Macroblock *macroblocks; 85 VP8Macroblock *macroblocks;
81 VP8Macroblock *macroblocks_base; 86 VP8Macroblock *macroblocks_base;
87 VP8FilterStrength *filter_strength;
82 int mb_stride; 88 int mb_stride;
83 89
84 uint8_t *intra4x4_pred_mode; 90 uint8_t *intra4x4_pred_mode;
85 uint8_t *intra4x4_pred_mode_base; 91 uint8_t *intra4x4_pred_mode_base;
86 int b4_stride; 92 int b4_stride;
229 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle 235 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle
230 s->mb_stride = s->mb_width+1; 236 s->mb_stride = s->mb_width+1;
231 s->b4_stride = 4*s->mb_stride; 237 s->b4_stride = 4*s->mb_stride;
232 238
233 s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks)); 239 s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks));
240 s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength));
234 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); 241 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1));
235 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); 242 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
236 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); 243 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
237 244
238 if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border) 245 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border)
239 return AVERROR(ENOMEM); 246 return AVERROR(ENOMEM);
240 247
241 s->macroblocks = s->macroblocks_base + 1 + s->mb_stride; 248 s->macroblocks = s->macroblocks_base + 1 + s->mb_stride;
242 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; 249 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride;
243 250
1210 u_dst += 4*s->uvlinesize; 1217 u_dst += 4*s->uvlinesize;
1211 v_dst += 4*s->uvlinesize; 1218 v_dst += 4*s->uvlinesize;
1212 } 1219 }
1213 } 1220 }
1214 1221
1215 static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, int *level, int *inner, int *hev_thresh) 1222 static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1216 { 1223 {
1217 int interior_limit, filter_level; 1224 int interior_limit, filter_level;
1218 1225
1219 if (s->segmentation.enabled) { 1226 if (s->segmentation.enabled) {
1220 filter_level = s->segmentation.filter_level[mb->segment]; 1227 filter_level = s->segmentation.filter_level[mb->segment];
1245 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; 1252 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1;
1246 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); 1253 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1247 } 1254 }
1248 interior_limit = FFMAX(interior_limit, 1); 1255 interior_limit = FFMAX(interior_limit, 1);
1249 1256
1250 *level = filter_level; 1257 f->filter_level = filter_level;
1251 *inner = interior_limit; 1258 f->inner_limit = interior_limit;
1252 1259 }
1253 if (hev_thresh) { 1260
1254 *hev_thresh = filter_level >= 15; 1261 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, VP8FilterStrength *f, int mb_x, int mb_y)
1255 1262 {
1256 if (s->keyframe) { 1263 int mbedge_lim, bedge_lim, hev_thresh;
1257 if (filter_level >= 40) 1264 int filter_level = f->filter_level;
1258 *hev_thresh = 2; 1265 int inner_limit = f->inner_limit;
1259 } else { 1266
1260 if (filter_level >= 40)
1261 *hev_thresh = 3;
1262 else if (filter_level >= 20)
1263 *hev_thresh = 2;
1264 }
1265 }
1266 }
1267
1268 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y)
1269 {
1270 int filter_level, inner_limit, hev_thresh, mbedge_lim, bedge_lim;
1271
1272 filter_level_for_mb(s, mb, &filter_level, &inner_limit, &hev_thresh);
1273 if (!filter_level) 1267 if (!filter_level)
1274 return; 1268 return;
1275 1269
1276 mbedge_lim = 2*(filter_level+2) + inner_limit; 1270 mbedge_lim = 2*(filter_level+2) + inner_limit;
1277 bedge_lim = 2* filter_level + inner_limit; 1271 bedge_lim = 2* filter_level + inner_limit;
1272 hev_thresh = filter_level >= 15;
1273
1274 if (s->keyframe) {
1275 if (filter_level >= 40)
1276 hev_thresh = 2;
1277 } else {
1278 if (filter_level >= 40)
1279 hev_thresh = 3;
1280 else if (filter_level >= 20)
1281 hev_thresh = 2;
1282 }
1278 1283
1279 if (mb_x) { 1284 if (mb_x) {
1280 s->vp8dsp.vp8_h_loop_filter16y(dst[0], s->linesize, 1285 s->vp8dsp.vp8_h_loop_filter16y(dst[0], s->linesize,
1281 mbedge_lim, inner_limit, hev_thresh); 1286 mbedge_lim, inner_limit, hev_thresh);
1282 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize, 1287 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize,
1317 s->uvlinesize, bedge_lim, 1322 s->uvlinesize, bedge_lim,
1318 inner_limit, hev_thresh); 1323 inner_limit, hev_thresh);
1319 } 1324 }
1320 } 1325 }
1321 1326
1322 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y) 1327 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, VP8FilterStrength *f, int mb_x, int mb_y)
1323 { 1328 {
1324 int filter_level, inner_limit, mbedge_lim, bedge_lim; 1329 int mbedge_lim, bedge_lim;
1325 1330 int filter_level = f->filter_level;
1326 filter_level_for_mb(s, mb, &filter_level, &inner_limit, NULL); 1331 int inner_limit = f->inner_limit;
1332
1327 if (!filter_level) 1333 if (!filter_level)
1328 return; 1334 return;
1329 1335
1330 mbedge_lim = 2*(filter_level+2) + inner_limit; 1336 mbedge_lim = 2*(filter_level+2) + inner_limit;
1331 bedge_lim = 2* filter_level + inner_limit; 1337 bedge_lim = 2* filter_level + inner_limit;
1347 } 1353 }
1348 } 1354 }
1349 1355
1350 static void filter_mb_row(VP8Context *s, int mb_y) 1356 static void filter_mb_row(VP8Context *s, int mb_y)
1351 { 1357 {
1358 VP8FilterStrength *f = s->filter_strength;
1352 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; 1359 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
1353 uint8_t *dst[3] = { 1360 uint8_t *dst[3] = {
1354 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, 1361 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1355 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, 1362 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize,
1356 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize 1363 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize
1357 }; 1364 };
1358 int mb_x; 1365 int mb_x;
1359 1366
1360 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { 1367 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1361 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); 1368 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1362 filter_mb(s, dst, mb++, mb_x, mb_y); 1369 filter_mb(s, dst, mb++, f++, mb_x, mb_y);
1363 dst[0] += 16; 1370 dst[0] += 16;
1364 dst[1] += 8; 1371 dst[1] += 8;
1365 dst[2] += 8; 1372 dst[2] += 8;
1366 } 1373 }
1367 } 1374 }
1368 1375
1369 static void filter_mb_row_simple(VP8Context *s, int mb_y) 1376 static void filter_mb_row_simple(VP8Context *s, int mb_y)
1370 { 1377 {
1378 VP8FilterStrength *f = s->filter_strength;
1379 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
1371 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; 1380 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
1372 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
1373 int mb_x; 1381 int mb_x;
1374 1382
1375 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { 1383 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1376 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); 1384 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1377 filter_mb_simple(s, dst, mb++, mb_x, mb_y); 1385 filter_mb_simple(s, dst, mb++, f++, mb_x, mb_y);
1378 dst += 16; 1386 dst += 16;
1379 } 1387 }
1380 } 1388 }
1381 1389
1382 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 1390 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1494 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { 1502 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1495 s->left_nnz[8] = 0; 1503 s->left_nnz[8] = 0;
1496 s->top_nnz[mb_x][8] = 0; 1504 s->top_nnz[mb_x][8] = 0;
1497 } 1505 }
1498 } 1506 }
1507
1508 if (s->deblock_filter)
1509 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1499 1510
1500 dst[0] += 16; 1511 dst[0] += 16;
1501 dst[1] += 8; 1512 dst[1] += 8;
1502 dst[2] += 8; 1513 dst[2] += 8;
1503 mb++; 1514 mb++;