Mercurial > libavcodec.hg
comparison vp8.c @ 12194:80b142c2e9f7 libavcodec
Change function prototypes for width=8 inner and mbedge loopfilter functions
so that it does both U and V planes at the same time. This will have speed
advantages when using SSE2 (or higher) optimizations, since we can do both
the U and V rows together in a single xmm register.
This also renames filter16 to filter16y and filter8 to filter8uv so that it's
more obvious what each function is used for.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 21:18:04 +0000 |
parents | 6f0db2eeaf70 |
children | b768afb88d1a |
comparison
equal
deleted
inserted
replaced
12193:0a63bed2a00e | 12194:80b142c2e9f7 |
---|---|
1243 | 1243 |
1244 mbedge_lim = 2*(filter_level+2) + inner_limit; | 1244 mbedge_lim = 2*(filter_level+2) + inner_limit; |
1245 bedge_lim = 2* filter_level + inner_limit; | 1245 bedge_lim = 2* filter_level + inner_limit; |
1246 | 1246 |
1247 if (mb_x) { | 1247 if (mb_x) { |
1248 s->vp8dsp.vp8_h_loop_filter16(dst[0], s->linesize, mbedge_lim, inner_limit, hev_thresh); | 1248 s->vp8dsp.vp8_h_loop_filter16y(dst[0], s->linesize, |
1249 s->vp8dsp.vp8_h_loop_filter8 (dst[1], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh); | 1249 mbedge_lim, inner_limit, hev_thresh); |
1250 s->vp8dsp.vp8_h_loop_filter8 (dst[2], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh); | 1250 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize, |
1251 mbedge_lim, inner_limit, hev_thresh); | |
1251 } | 1252 } |
1252 | 1253 |
1253 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | 1254 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { |
1254 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 4, s->linesize, bedge_lim, inner_limit, hev_thresh); | 1255 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, s->linesize, bedge_lim, |
1255 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 8, s->linesize, bedge_lim, inner_limit, hev_thresh); | 1256 inner_limit, hev_thresh); |
1256 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+12, s->linesize, bedge_lim, inner_limit, hev_thresh); | 1257 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, s->linesize, bedge_lim, |
1257 s->vp8dsp.vp8_h_loop_filter8_inner (dst[1]+ 4, s->uvlinesize, bedge_lim, inner_limit, hev_thresh); | 1258 inner_limit, hev_thresh); |
1258 s->vp8dsp.vp8_h_loop_filter8_inner (dst[2]+ 4, s->uvlinesize, bedge_lim, inner_limit, hev_thresh); | 1259 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, s->linesize, bedge_lim, |
1260 inner_limit, hev_thresh); | |
1261 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, | |
1262 s->uvlinesize, bedge_lim, | |
1263 inner_limit, hev_thresh); | |
1259 } | 1264 } |
1260 | 1265 |
1261 if (mb_y) { | 1266 if (mb_y) { |
1262 s->vp8dsp.vp8_v_loop_filter16(dst[0], s->linesize, mbedge_lim, inner_limit, hev_thresh); | 1267 s->vp8dsp.vp8_v_loop_filter16y(dst[0], s->linesize, |
1263 s->vp8dsp.vp8_v_loop_filter8 (dst[1], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh); | 1268 mbedge_lim, inner_limit, hev_thresh); |
1264 s->vp8dsp.vp8_v_loop_filter8 (dst[2], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh); | 1269 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], s->uvlinesize, |
1270 mbedge_lim, inner_limit, hev_thresh); | |
1265 } | 1271 } |
1266 | 1272 |
1267 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | 1273 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { |
1268 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 4*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh); | 1274 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*s->linesize, |
1269 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 8*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh); | 1275 s->linesize, bedge_lim, |
1270 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+12*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh); | 1276 inner_limit, hev_thresh); |
1271 s->vp8dsp.vp8_v_loop_filter8_inner (dst[1]+ 4*s->uvlinesize, s->uvlinesize, bedge_lim, inner_limit, hev_thresh); | 1277 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*s->linesize, |
1272 s->vp8dsp.vp8_v_loop_filter8_inner (dst[2]+ 4*s->uvlinesize, s->uvlinesize, bedge_lim, inner_limit, hev_thresh); | 1278 s->linesize, bedge_lim, |
1279 inner_limit, hev_thresh); | |
1280 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*s->linesize, | |
1281 s->linesize, bedge_lim, | |
1282 inner_limit, hev_thresh); | |
1283 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * s->uvlinesize, | |
1284 dst[2] + 4 * s->uvlinesize, | |
1285 s->uvlinesize, bedge_lim, | |
1286 inner_limit, hev_thresh); | |
1273 } | 1287 } |
1274 } | 1288 } |
1275 | 1289 |
1276 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y) | 1290 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y) |
1277 { | 1291 { |