Mercurial > libavcodec.hg
comparison x86/vp8dsp-init.c @ 12168:b246b214c2e9 libavcodec
VP8 H/V inner loopfilter MMX/MMXEXT/SSE2 optimizations.
author | rbultje |
---|---|
date | Thu, 15 Jul 2010 23:02:34 +0000 |
parents | d780ae746855 |
children | 80b142c2e9f7 |
comparison
equal
deleted
inserted
replaced
12167:69bbfd8f2ba5 | 12168:b246b214c2e9 |
---|---|
227 extern void ff_vp8_v_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); | 227 extern void ff_vp8_v_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); |
228 extern void ff_vp8_v_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); | 228 extern void ff_vp8_v_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); |
229 extern void ff_vp8_h_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim); | 229 extern void ff_vp8_h_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim); |
230 extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); | 230 extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); |
231 extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); | 231 extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); |
232 | |
233 extern void ff_vp8_v_loop_filter16_inner_mmx (uint8_t *dst, int stride, | |
234 int e, int i, int hvt); | |
235 extern void ff_vp8_v_loop_filter16_inner_mmxext(uint8_t *dst, int stride, | |
236 int e, int i, int hvt); | |
237 extern void ff_vp8_v_loop_filter16_inner_sse2 (uint8_t *dst, int stride, | |
238 int e, int i, int hvt); | |
239 extern void ff_vp8_h_loop_filter16_inner_mmx (uint8_t *dst, int stride, | |
240 int e, int i, int hvt); | |
241 extern void ff_vp8_h_loop_filter16_inner_mmxext(uint8_t *dst, int stride, | |
242 int e, int i, int hvt); | |
243 extern void ff_vp8_h_loop_filter16_inner_sse2 (uint8_t *dst, int stride, | |
244 int e, int i, int hvt); | |
232 #endif | 245 #endif |
233 | 246 |
234 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ | 247 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ |
235 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ | 248 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ |
236 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ | 249 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ |
268 c->put_vp8_epel_pixels_tab[1][0][0] = | 281 c->put_vp8_epel_pixels_tab[1][0][0] = |
269 c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; | 282 c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; |
270 | 283 |
271 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; | 284 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; |
272 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; | 285 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; |
286 | |
287 c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_mmx; | |
288 c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_mmx; | |
273 } | 289 } |
274 | 290 |
275 /* note that 4-tap width=16 functions are missing because w=16 | 291 /* note that 4-tap width=16 functions are missing because w=16 |
276 * is only used for luma, and luma is always a copy or sixtap. */ | 292 * is only used for luma, and luma is always a copy or sixtap. */ |
277 if (mm_flags & FF_MM_MMX2) { | 293 if (mm_flags & FF_MM_MMX2) { |
283 VP8_BILINEAR_MC_FUNC(1, 8, mmxext); | 299 VP8_BILINEAR_MC_FUNC(1, 8, mmxext); |
284 VP8_BILINEAR_MC_FUNC(2, 4, mmxext); | 300 VP8_BILINEAR_MC_FUNC(2, 4, mmxext); |
285 | 301 |
286 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; | 302 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; |
287 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; | 303 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; |
304 | |
305 c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_mmxext; | |
306 c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_mmxext; | |
288 } | 307 } |
289 | 308 |
290 if (mm_flags & FF_MM_SSE) { | 309 if (mm_flags & FF_MM_SSE) { |
291 c->put_vp8_epel_pixels_tab[0][0][0] = | 310 c->put_vp8_epel_pixels_tab[0][0][0] = |
292 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; | 311 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; |
298 VP8_BILINEAR_MC_FUNC(0, 16, sse2); | 317 VP8_BILINEAR_MC_FUNC(0, 16, sse2); |
299 VP8_BILINEAR_MC_FUNC(1, 8, sse2); | 318 VP8_BILINEAR_MC_FUNC(1, 8, sse2); |
300 | 319 |
301 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; | 320 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; |
302 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; | 321 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; |
322 | |
323 c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_sse2; | |
324 c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_sse2; | |
303 } | 325 } |
304 | 326 |
305 if (mm_flags & FF_MM_SSSE3) { | 327 if (mm_flags & FF_MM_SSSE3) { |
306 VP8_LUMA_MC_FUNC(0, 16, ssse3); | 328 VP8_LUMA_MC_FUNC(0, 16, ssse3); |
307 VP8_MC_FUNC(1, 8, ssse3); | 329 VP8_MC_FUNC(1, 8, ssse3); |