comparison vp8dsp.c @ 12194:80b142c2e9f7 libavcodec

Change function prototypes for width=8 inner and mbedge loopfilter functions so that it does both U and V planes at the same time. This will have speed advantages when using SSE2 (or higher) optimizations, since we can do both the U and V rows together in a single xmm register. This also renames filter16 to filter16y and filter8 to filter8uv so that it's more obvious what each function is used for.
author rbultje
date Mon, 19 Jul 2010 21:18:04 +0000
parents 812e23197d64
children e08d65897115
comparison
equal deleted inserted replaced
12193:0a63bed2a00e 12194:80b142c2e9f7
194 p[ 0*stride] = cm[q0 - a0]; 194 p[ 0*stride] = cm[q0 - a0];
195 p[ 1*stride] = cm[q1 - a1]; 195 p[ 1*stride] = cm[q1 - a1];
196 p[ 2*stride] = cm[q2 - a2]; 196 p[ 2*stride] = cm[q2 - a2];
197 } 197 }
198 198
199 #define LOOP_FILTER(dir, size, stridea, strideb) \ 199 #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \
200 static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ 200 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\
201 int flim_E, int flim_I, int hev_thresh)\ 201 int flim_E, int flim_I, int hev_thresh)\
202 {\ 202 {\
203 int i;\ 203 int i;\
204 \ 204 \
205 for (i = 0; i < size; i++)\ 205 for (i = 0; i < size; i++)\
209 else\ 209 else\
210 filter_mbedge(dst+i*stridea, strideb);\ 210 filter_mbedge(dst+i*stridea, strideb);\
211 }\ 211 }\
212 }\ 212 }\
213 \ 213 \
214 static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ 214 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\
215 int flim_E, int flim_I, int hev_thresh)\ 215 int flim_E, int flim_I, int hev_thresh)\
216 {\ 216 {\
217 int i;\ 217 int i;\
218 \ 218 \
219 for (i = 0; i < size; i++)\ 219 for (i = 0; i < size; i++)\
224 else \ 224 else \
225 filter_common(dst+i*stridea, strideb, 0);\ 225 filter_common(dst+i*stridea, strideb, 0);\
226 }\ 226 }\
227 } 227 }
228 228
229 LOOP_FILTER(v, 16, 1, stride) 229 LOOP_FILTER(v, 16, 1, stride,)
230 LOOP_FILTER(h, 16, stride, 1) 230 LOOP_FILTER(h, 16, stride, 1,)
231 LOOP_FILTER(v, 8, 1, stride) 231
232 LOOP_FILTER(h, 8, stride, 1) 232 #define UV_LOOP_FILTER(dir, stridea, strideb) \
233 LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \
234 static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\
235 int fE, int fI, int hev_thresh)\
236 {\
237 vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\
238 vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\
239 }\
240 static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\
241 int fE, int fI, int hev_thresh)\
242 {\
243 vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\
244 vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\
245 }
246
247 UV_LOOP_FILTER(v, 1, stride)
248 UV_LOOP_FILTER(h, stride, 1)
233 249
234 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) 250 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
235 { 251 {
236 int i; 252 int i;
237 253
441 { 457 {
442 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; 458 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c;
443 dsp->vp8_idct_add = vp8_idct_add_c; 459 dsp->vp8_idct_add = vp8_idct_add_c;
444 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; 460 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c;
445 461
446 dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c; 462 dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
447 dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c; 463 dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
448 dsp->vp8_v_loop_filter8 = vp8_v_loop_filter8_c; 464 dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
449 dsp->vp8_h_loop_filter8 = vp8_h_loop_filter8_c; 465 dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
450 466
451 dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c; 467 dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
452 dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c; 468 dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
453 dsp->vp8_v_loop_filter8_inner = vp8_v_loop_filter8_inner_c; 469 dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
454 dsp->vp8_h_loop_filter8_inner = vp8_h_loop_filter8_inner_c; 470 dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
455 471
456 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; 472 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
457 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; 473 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
458 474
459 VP8_MC_FUNC(0, 16); 475 VP8_MC_FUNC(0, 16);