Mercurial > libavcodec.hg
comparison vp8.c @ 12222:7acdbfd2a222 libavcodec
Calculate deblock strength per-MB instead of per-row
Gives better cache locality, since the VP8Macroblock structs are still in cache.
Inspired by the way x264 does it.
author | darkshikari |
---|---|
date | Thu, 22 Jul 2010 07:24:22 +0000 |
parents | 45852dac8338 |
children | 93e27a5401de |
comparison
equal
deleted
inserted
replaced
12221:45852dac8338 | 12222:7acdbfd2a222 |
---|---|
27 #include "vp8dsp.h" | 27 #include "vp8dsp.h" |
28 #include "h264pred.h" | 28 #include "h264pred.h" |
29 #include "rectangle.h" | 29 #include "rectangle.h" |
30 | 30 |
31 typedef struct { | 31 typedef struct { |
32 uint8_t filter_level; | |
33 uint8_t inner_limit; | |
34 } VP8FilterStrength; | |
35 | |
36 typedef struct { | |
32 uint8_t segment; | 37 uint8_t segment; |
33 uint8_t skip; | 38 uint8_t skip; |
34 // todo: make it possible to check for at least (i4x4 or split_mv) | 39 // todo: make it possible to check for at least (i4x4 or split_mv) |
35 // in one op. are others needed? | 40 // in one op. are others needed? |
36 uint8_t mode; | 41 uint8_t mode; |
77 int num_coeff_partitions; | 82 int num_coeff_partitions; |
78 VP56RangeCoder coeff_partition[8]; | 83 VP56RangeCoder coeff_partition[8]; |
79 | 84 |
80 VP8Macroblock *macroblocks; | 85 VP8Macroblock *macroblocks; |
81 VP8Macroblock *macroblocks_base; | 86 VP8Macroblock *macroblocks_base; |
87 VP8FilterStrength *filter_strength; | |
82 int mb_stride; | 88 int mb_stride; |
83 | 89 |
84 uint8_t *intra4x4_pred_mode; | 90 uint8_t *intra4x4_pred_mode; |
85 uint8_t *intra4x4_pred_mode_base; | 91 uint8_t *intra4x4_pred_mode_base; |
86 int b4_stride; | 92 int b4_stride; |
229 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle | 235 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle |
230 s->mb_stride = s->mb_width+1; | 236 s->mb_stride = s->mb_width+1; |
231 s->b4_stride = 4*s->mb_stride; | 237 s->b4_stride = 4*s->mb_stride; |
232 | 238 |
233 s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks)); | 239 s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks)); |
240 s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength)); | |
234 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); | 241 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); |
235 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); | 242 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); |
236 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); | 243 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); |
237 | 244 |
238 if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border) | 245 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border) |
239 return AVERROR(ENOMEM); | 246 return AVERROR(ENOMEM); |
240 | 247 |
241 s->macroblocks = s->macroblocks_base + 1 + s->mb_stride; | 248 s->macroblocks = s->macroblocks_base + 1 + s->mb_stride; |
242 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; | 249 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; |
243 | 250 |
1210 u_dst += 4*s->uvlinesize; | 1217 u_dst += 4*s->uvlinesize; |
1211 v_dst += 4*s->uvlinesize; | 1218 v_dst += 4*s->uvlinesize; |
1212 } | 1219 } |
1213 } | 1220 } |
1214 | 1221 |
1215 static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, int *level, int *inner, int *hev_thresh) | 1222 static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) |
1216 { | 1223 { |
1217 int interior_limit, filter_level; | 1224 int interior_limit, filter_level; |
1218 | 1225 |
1219 if (s->segmentation.enabled) { | 1226 if (s->segmentation.enabled) { |
1220 filter_level = s->segmentation.filter_level[mb->segment]; | 1227 filter_level = s->segmentation.filter_level[mb->segment]; |
1245 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; | 1252 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; |
1246 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); | 1253 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); |
1247 } | 1254 } |
1248 interior_limit = FFMAX(interior_limit, 1); | 1255 interior_limit = FFMAX(interior_limit, 1); |
1249 | 1256 |
1250 *level = filter_level; | 1257 f->filter_level = filter_level; |
1251 *inner = interior_limit; | 1258 f->inner_limit = interior_limit; |
1252 | 1259 } |
1253 if (hev_thresh) { | 1260 |
1254 *hev_thresh = filter_level >= 15; | 1261 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, VP8FilterStrength *f, int mb_x, int mb_y) |
1255 | 1262 { |
1256 if (s->keyframe) { | 1263 int mbedge_lim, bedge_lim, hev_thresh; |
1257 if (filter_level >= 40) | 1264 int filter_level = f->filter_level; |
1258 *hev_thresh = 2; | 1265 int inner_limit = f->inner_limit; |
1259 } else { | 1266 |
1260 if (filter_level >= 40) | |
1261 *hev_thresh = 3; | |
1262 else if (filter_level >= 20) | |
1263 *hev_thresh = 2; | |
1264 } | |
1265 } | |
1266 } | |
1267 | |
1268 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y) | |
1269 { | |
1270 int filter_level, inner_limit, hev_thresh, mbedge_lim, bedge_lim; | |
1271 | |
1272 filter_level_for_mb(s, mb, &filter_level, &inner_limit, &hev_thresh); | |
1273 if (!filter_level) | 1267 if (!filter_level) |
1274 return; | 1268 return; |
1275 | 1269 |
1276 mbedge_lim = 2*(filter_level+2) + inner_limit; | 1270 mbedge_lim = 2*(filter_level+2) + inner_limit; |
1277 bedge_lim = 2* filter_level + inner_limit; | 1271 bedge_lim = 2* filter_level + inner_limit; |
1272 hev_thresh = filter_level >= 15; | |
1273 | |
1274 if (s->keyframe) { | |
1275 if (filter_level >= 40) | |
1276 hev_thresh = 2; | |
1277 } else { | |
1278 if (filter_level >= 40) | |
1279 hev_thresh = 3; | |
1280 else if (filter_level >= 20) | |
1281 hev_thresh = 2; | |
1282 } | |
1278 | 1283 |
1279 if (mb_x) { | 1284 if (mb_x) { |
1280 s->vp8dsp.vp8_h_loop_filter16y(dst[0], s->linesize, | 1285 s->vp8dsp.vp8_h_loop_filter16y(dst[0], s->linesize, |
1281 mbedge_lim, inner_limit, hev_thresh); | 1286 mbedge_lim, inner_limit, hev_thresh); |
1282 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize, | 1287 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize, |
1317 s->uvlinesize, bedge_lim, | 1322 s->uvlinesize, bedge_lim, |
1318 inner_limit, hev_thresh); | 1323 inner_limit, hev_thresh); |
1319 } | 1324 } |
1320 } | 1325 } |
1321 | 1326 |
1322 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y) | 1327 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, VP8FilterStrength *f, int mb_x, int mb_y) |
1323 { | 1328 { |
1324 int filter_level, inner_limit, mbedge_lim, bedge_lim; | 1329 int mbedge_lim, bedge_lim; |
1325 | 1330 int filter_level = f->filter_level; |
1326 filter_level_for_mb(s, mb, &filter_level, &inner_limit, NULL); | 1331 int inner_limit = f->inner_limit; |
1332 | |
1327 if (!filter_level) | 1333 if (!filter_level) |
1328 return; | 1334 return; |
1329 | 1335 |
1330 mbedge_lim = 2*(filter_level+2) + inner_limit; | 1336 mbedge_lim = 2*(filter_level+2) + inner_limit; |
1331 bedge_lim = 2* filter_level + inner_limit; | 1337 bedge_lim = 2* filter_level + inner_limit; |
1347 } | 1353 } |
1348 } | 1354 } |
1349 | 1355 |
1350 static void filter_mb_row(VP8Context *s, int mb_y) | 1356 static void filter_mb_row(VP8Context *s, int mb_y) |
1351 { | 1357 { |
1358 VP8FilterStrength *f = s->filter_strength; | |
1352 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | 1359 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; |
1353 uint8_t *dst[3] = { | 1360 uint8_t *dst[3] = { |
1354 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, | 1361 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, |
1355 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, | 1362 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, |
1356 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize | 1363 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize |
1357 }; | 1364 }; |
1358 int mb_x; | 1365 int mb_x; |
1359 | 1366 |
1360 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | 1367 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { |
1361 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); | 1368 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
1362 filter_mb(s, dst, mb++, mb_x, mb_y); | 1369 filter_mb(s, dst, mb++, f++, mb_x, mb_y); |
1363 dst[0] += 16; | 1370 dst[0] += 16; |
1364 dst[1] += 8; | 1371 dst[1] += 8; |
1365 dst[2] += 8; | 1372 dst[2] += 8; |
1366 } | 1373 } |
1367 } | 1374 } |
1368 | 1375 |
1369 static void filter_mb_row_simple(VP8Context *s, int mb_y) | 1376 static void filter_mb_row_simple(VP8Context *s, int mb_y) |
1370 { | 1377 { |
1378 VP8FilterStrength *f = s->filter_strength; | |
1379 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | |
1371 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; | 1380 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; |
1372 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | |
1373 int mb_x; | 1381 int mb_x; |
1374 | 1382 |
1375 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | 1383 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { |
1376 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); | 1384 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); |
1377 filter_mb_simple(s, dst, mb++, mb_x, mb_y); | 1385 filter_mb_simple(s, dst, mb++, f++, mb_x, mb_y); |
1378 dst += 16; | 1386 dst += 16; |
1379 } | 1387 } |
1380 } | 1388 } |
1381 | 1389 |
1382 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | 1390 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, |
1494 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | 1502 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { |
1495 s->left_nnz[8] = 0; | 1503 s->left_nnz[8] = 0; |
1496 s->top_nnz[mb_x][8] = 0; | 1504 s->top_nnz[mb_x][8] = 0; |
1497 } | 1505 } |
1498 } | 1506 } |
1507 | |
1508 if (s->deblock_filter) | |
1509 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); | |
1499 | 1510 |
1500 dst[0] += 16; | 1511 dst[0] += 16; |
1501 dst[1] += 8; | 1512 dst[1] += 8; |
1502 dst[2] += 8; | 1513 dst[2] += 8; |
1503 mb++; | 1514 mb++; |