Mercurial > libavcodec.hg
comparison vp8dsp.c @ 12194:80b142c2e9f7 libavcodec
Change function prototypes for width=8 inner and mbedge loopfilter functions
so that it does both U and V planes at the same time. This will have speed
advantages when using SSE2 (or higher) optimizations, since we can do both
the U and V rows together in a single xmm register.
This also renames filter16 to filter16y and filter8 to filter8uv so that it's
more obvious what each function is used for.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 21:18:04 +0000 |
parents | 812e23197d64 |
children | e08d65897115 |
comparison
equal
deleted
inserted
replaced
12193:0a63bed2a00e | 12194:80b142c2e9f7 |
---|---|
194 p[ 0*stride] = cm[q0 - a0]; | 194 p[ 0*stride] = cm[q0 - a0]; |
195 p[ 1*stride] = cm[q1 - a1]; | 195 p[ 1*stride] = cm[q1 - a1]; |
196 p[ 2*stride] = cm[q2 - a2]; | 196 p[ 2*stride] = cm[q2 - a2]; |
197 } | 197 } |
198 | 198 |
199 #define LOOP_FILTER(dir, size, stridea, strideb) \ | 199 #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ |
200 static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ | 200 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ |
201 int flim_E, int flim_I, int hev_thresh)\ | 201 int flim_E, int flim_I, int hev_thresh)\ |
202 {\ | 202 {\ |
203 int i;\ | 203 int i;\ |
204 \ | 204 \ |
205 for (i = 0; i < size; i++)\ | 205 for (i = 0; i < size; i++)\ |
209 else\ | 209 else\ |
210 filter_mbedge(dst+i*stridea, strideb);\ | 210 filter_mbedge(dst+i*stridea, strideb);\ |
211 }\ | 211 }\ |
212 }\ | 212 }\ |
213 \ | 213 \ |
214 static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ | 214 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ |
215 int flim_E, int flim_I, int hev_thresh)\ | 215 int flim_E, int flim_I, int hev_thresh)\ |
216 {\ | 216 {\ |
217 int i;\ | 217 int i;\ |
218 \ | 218 \ |
219 for (i = 0; i < size; i++)\ | 219 for (i = 0; i < size; i++)\ |
224 else \ | 224 else \ |
225 filter_common(dst+i*stridea, strideb, 0);\ | 225 filter_common(dst+i*stridea, strideb, 0);\ |
226 }\ | 226 }\ |
227 } | 227 } |
228 | 228 |
229 LOOP_FILTER(v, 16, 1, stride) | 229 LOOP_FILTER(v, 16, 1, stride,) |
230 LOOP_FILTER(h, 16, stride, 1) | 230 LOOP_FILTER(h, 16, stride, 1,) |
231 LOOP_FILTER(v, 8, 1, stride) | 231 |
232 LOOP_FILTER(h, 8, stride, 1) | 232 #define UV_LOOP_FILTER(dir, stridea, strideb) \ |
233 LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ | |
234 static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\ | |
235 int fE, int fI, int hev_thresh)\ | |
236 {\ | |
237 vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ | |
238 vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ | |
239 }\ | |
240 static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\ | |
241 int fE, int fI, int hev_thresh)\ | |
242 {\ | |
243 vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ | |
244 vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\ | |
245 } | |
246 | |
247 UV_LOOP_FILTER(v, 1, stride) | |
248 UV_LOOP_FILTER(h, stride, 1) | |
233 | 249 |
234 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | 250 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) |
235 { | 251 { |
236 int i; | 252 int i; |
237 | 253 |
441 { | 457 { |
442 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; | 458 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; |
443 dsp->vp8_idct_add = vp8_idct_add_c; | 459 dsp->vp8_idct_add = vp8_idct_add_c; |
444 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; | 460 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; |
445 | 461 |
446 dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c; | 462 dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; |
447 dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c; | 463 dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |
448 dsp->vp8_v_loop_filter8 = vp8_v_loop_filter8_c; | 464 dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; |
449 dsp->vp8_h_loop_filter8 = vp8_h_loop_filter8_c; | 465 dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; |
450 | 466 |
451 dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c; | 467 dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; |
452 dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c; | 468 dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; |
453 dsp->vp8_v_loop_filter8_inner = vp8_v_loop_filter8_inner_c; | 469 dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; |
454 dsp->vp8_h_loop_filter8_inner = vp8_h_loop_filter8_inner_c; | 470 dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; |
455 | 471 |
456 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; | 472 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; |
457 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; | 473 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; |
458 | 474 |
459 VP8_MC_FUNC(0, 16); | 475 VP8_MC_FUNC(0, 16); |