Mercurial > libavcodec.hg
diff vp8dsp.c @ 12194:80b142c2e9f7 libavcodec
Change function prototypes for width=8 inner and mbedge loopfilter functions
so that it does both U and V planes at the same time. This will have speed
advantages when using SSE2 (or higher) optimizations, since we can do both
the U and V rows together in a single xmm register.
This also renames filter16 to filter16y and filter8 to filter8uv so that it's
more obvious what each function is used for.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 21:18:04 +0000 |
parents | 812e23197d64 |
children | e08d65897115 |
line wrap: on
line diff
--- a/vp8dsp.c Mon Jul 19 20:53:58 2010 +0000 +++ b/vp8dsp.c Mon Jul 19 21:18:04 2010 +0000 @@ -196,8 +196,8 @@ p[ 2*stride] = cm[q2 - a2]; } -#define LOOP_FILTER(dir, size, stridea, strideb) \ -static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ +#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ +static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ int flim_E, int flim_I, int hev_thresh)\ {\ int i;\ @@ -211,7 +211,7 @@ }\ }\ \ -static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ +static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ int flim_E, int flim_I, int hev_thresh)\ {\ int i;\ @@ -226,10 +226,26 @@ }\ } -LOOP_FILTER(v, 16, 1, stride) -LOOP_FILTER(h, 16, stride, 1) -LOOP_FILTER(v, 8, 1, stride) -LOOP_FILTER(h, 8, stride, 1) +LOOP_FILTER(v, 16, 1, stride,) +LOOP_FILTER(h, 16, stride, 1,) + +#define UV_LOOP_FILTER(dir, stridea, strideb) \ +LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ +static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\ + int fE, int fI, int hev_thresh)\ +{\ + vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ + vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ +}\ +static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\ + int fE, int fI, int hev_thresh)\ +{\ + vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ + vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\ +} + +UV_LOOP_FILTER(v, 1, stride) +UV_LOOP_FILTER(h, stride, 1) static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) { @@ -443,15 +459,15 @@ dsp->vp8_idct_add = vp8_idct_add_c; dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; - dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c; - dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c; - dsp->vp8_v_loop_filter8 = vp8_v_loop_filter8_c; - dsp->vp8_h_loop_filter8 = vp8_h_loop_filter8_c; + dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; + dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; + dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; + dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; - dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c; - dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c; - dsp->vp8_v_loop_filter8_inner = vp8_v_loop_filter8_inner_c; - dsp->vp8_h_loop_filter8_inner = vp8_h_loop_filter8_inner_c; + dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; + dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; + dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; + dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;