Mercurial > libavcodec.hg
comparison x86/vp8dsp-init.c @ 12194:80b142c2e9f7 libavcodec
Change function prototypes for width=8 inner and mbedge loopfilter functions
so that it does both U and V planes at the same time. This will have speed
advantages when using SSE2 (or higher) optimizations, since we can do both
the U and V rows together in a single xmm register.
This also renames filter16 to filter16y and filter8 to filter8uv so that it's
more obvious what each function is used for.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 21:18:04 +0000 |
parents | b246b214c2e9 |
children | 552c7c10bc73 |
comparison
equal
deleted
inserted
replaced
12193:0a63bed2a00e | 12194:80b142c2e9f7 |
---|---|
228 extern void ff_vp8_v_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); | 228 extern void ff_vp8_v_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); |
229 extern void ff_vp8_h_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim); | 229 extern void ff_vp8_h_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim); |
230 extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); | 230 extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); |
231 extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); | 231 extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); |
232 | 232 |
233 extern void ff_vp8_v_loop_filter16_inner_mmx (uint8_t *dst, int stride, | 233 extern void ff_vp8_v_loop_filter16y_inner_mmx (uint8_t *dst, int stride, |
234 int e, int i, int hvt); | 234 int e, int i, int hvt); |
235 extern void ff_vp8_v_loop_filter16_inner_mmxext(uint8_t *dst, int stride, | 235 extern void ff_vp8_v_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, |
236 int e, int i, int hvt); | 236 int e, int i, int hvt); |
237 extern void ff_vp8_v_loop_filter16_inner_sse2 (uint8_t *dst, int stride, | 237 extern void ff_vp8_v_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, |
238 int e, int i, int hvt); | 238 int e, int i, int hvt); |
239 extern void ff_vp8_h_loop_filter16_inner_mmx (uint8_t *dst, int stride, | 239 extern void ff_vp8_h_loop_filter16y_inner_mmx (uint8_t *dst, int stride, |
240 int e, int i, int hvt); | 240 int e, int i, int hvt); |
241 extern void ff_vp8_h_loop_filter16_inner_mmxext(uint8_t *dst, int stride, | 241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, |
242 int e, int i, int hvt); | 242 int e, int i, int hvt); |
243 extern void ff_vp8_h_loop_filter16_inner_sse2 (uint8_t *dst, int stride, | 243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, |
244 int e, int i, int hvt); | 244 int e, int i, int hvt); |
245 #endif | 245 #endif |
246 | 246 |
247 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ | 247 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ |
248 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ | 248 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ |
249 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ | 249 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ |
282 c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; | 282 c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; |
283 | 283 |
284 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; | 284 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; |
285 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; | 285 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; |
286 | 286 |
287 c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_mmx; | 287 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; |
288 c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_mmx; | 288 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; |
289 } | 289 } |
290 | 290 |
291 /* note that 4-tap width=16 functions are missing because w=16 | 291 /* note that 4-tap width=16 functions are missing because w=16 |
292 * is only used for luma, and luma is always a copy or sixtap. */ | 292 * is only used for luma, and luma is always a copy or sixtap. */ |
293 if (mm_flags & FF_MM_MMX2) { | 293 if (mm_flags & FF_MM_MMX2) { |
300 VP8_BILINEAR_MC_FUNC(2, 4, mmxext); | 300 VP8_BILINEAR_MC_FUNC(2, 4, mmxext); |
301 | 301 |
302 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; | 302 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; |
303 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; | 303 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; |
304 | 304 |
305 c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_mmxext; | 305 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; |
306 c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_mmxext; | 306 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; |
307 } | 307 } |
308 | 308 |
309 if (mm_flags & FF_MM_SSE) { | 309 if (mm_flags & FF_MM_SSE) { |
310 c->put_vp8_epel_pixels_tab[0][0][0] = | 310 c->put_vp8_epel_pixels_tab[0][0][0] = |
311 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; | 311 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; |
318 VP8_BILINEAR_MC_FUNC(1, 8, sse2); | 318 VP8_BILINEAR_MC_FUNC(1, 8, sse2); |
319 | 319 |
320 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; | 320 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; |
321 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; | 321 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; |
322 | 322 |
323 c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_sse2; | 323 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; |
324 c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_sse2; | 324 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; |
325 } | 325 } |
326 | 326 |
327 if (mm_flags & FF_MM_SSSE3) { | 327 if (mm_flags & FF_MM_SSSE3) { |
328 VP8_LUMA_MC_FUNC(0, 16, ssse3); | 328 VP8_LUMA_MC_FUNC(0, 16, ssse3); |
329 VP8_MC_FUNC(1, 8, ssse3); | 329 VP8_MC_FUNC(1, 8, ssse3); |