comparison vp8.c @ 12194:80b142c2e9f7 libavcodec

Change function prototypes for width=8 inner and mbedge loopfilter functions so that it does both U and V planes at the same time. This will have speed advantages when using SSE2 (or higher) optimizations, since we can do both the U and V rows together in a single xmm register. This also renames filter16 to filter16y and filter8 to filter8uv so that it's more obvious what each function is used for.
author rbultje
date Mon, 19 Jul 2010 21:18:04 +0000
parents 6f0db2eeaf70
children b768afb88d1a
comparison
equal deleted inserted replaced
12193:0a63bed2a00e 12194:80b142c2e9f7
1243 1243
1244 mbedge_lim = 2*(filter_level+2) + inner_limit; 1244 mbedge_lim = 2*(filter_level+2) + inner_limit;
1245 bedge_lim = 2* filter_level + inner_limit; 1245 bedge_lim = 2* filter_level + inner_limit;
1246 1246
1247 if (mb_x) { 1247 if (mb_x) {
1248 s->vp8dsp.vp8_h_loop_filter16(dst[0], s->linesize, mbedge_lim, inner_limit, hev_thresh); 1248 s->vp8dsp.vp8_h_loop_filter16y(dst[0], s->linesize,
1249 s->vp8dsp.vp8_h_loop_filter8 (dst[1], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh); 1249 mbedge_lim, inner_limit, hev_thresh);
1250 s->vp8dsp.vp8_h_loop_filter8 (dst[2], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh); 1250 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize,
1251 mbedge_lim, inner_limit, hev_thresh);
1251 } 1252 }
1252 1253
1253 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { 1254 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) {
1254 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 4, s->linesize, bedge_lim, inner_limit, hev_thresh); 1255 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, s->linesize, bedge_lim,
1255 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 8, s->linesize, bedge_lim, inner_limit, hev_thresh); 1256 inner_limit, hev_thresh);
1256 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+12, s->linesize, bedge_lim, inner_limit, hev_thresh); 1257 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, s->linesize, bedge_lim,
1257 s->vp8dsp.vp8_h_loop_filter8_inner (dst[1]+ 4, s->uvlinesize, bedge_lim, inner_limit, hev_thresh); 1258 inner_limit, hev_thresh);
1258 s->vp8dsp.vp8_h_loop_filter8_inner (dst[2]+ 4, s->uvlinesize, bedge_lim, inner_limit, hev_thresh); 1259 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, s->linesize, bedge_lim,
1260 inner_limit, hev_thresh);
1261 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1262 s->uvlinesize, bedge_lim,
1263 inner_limit, hev_thresh);
1259 } 1264 }
1260 1265
1261 if (mb_y) { 1266 if (mb_y) {
1262 s->vp8dsp.vp8_v_loop_filter16(dst[0], s->linesize, mbedge_lim, inner_limit, hev_thresh); 1267 s->vp8dsp.vp8_v_loop_filter16y(dst[0], s->linesize,
1263 s->vp8dsp.vp8_v_loop_filter8 (dst[1], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh); 1268 mbedge_lim, inner_limit, hev_thresh);
1264 s->vp8dsp.vp8_v_loop_filter8 (dst[2], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh); 1269 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], s->uvlinesize,
1270 mbedge_lim, inner_limit, hev_thresh);
1265 } 1271 }
1266 1272
1267 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { 1273 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) {
1268 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 4*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh); 1274 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*s->linesize,
1269 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 8*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh); 1275 s->linesize, bedge_lim,
1270 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+12*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh); 1276 inner_limit, hev_thresh);
1271 s->vp8dsp.vp8_v_loop_filter8_inner (dst[1]+ 4*s->uvlinesize, s->uvlinesize, bedge_lim, inner_limit, hev_thresh); 1277 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*s->linesize,
1272 s->vp8dsp.vp8_v_loop_filter8_inner (dst[2]+ 4*s->uvlinesize, s->uvlinesize, bedge_lim, inner_limit, hev_thresh); 1278 s->linesize, bedge_lim,
1279 inner_limit, hev_thresh);
1280 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*s->linesize,
1281 s->linesize, bedge_lim,
1282 inner_limit, hev_thresh);
1283 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * s->uvlinesize,
1284 dst[2] + 4 * s->uvlinesize,
1285 s->uvlinesize, bedge_lim,
1286 inner_limit, hev_thresh);
1273 } 1287 }
1274 } 1288 }
1275 1289
1276 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y) 1290 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y)
1277 { 1291 {