Mercurial > libavcodec.hg
comparison x86/vp8dsp-init.c @ 12205:d38e8565ba05 libavcodec
VP8 MBedge loopfilter MMX/MMX2/SSE2 functions for both luma (width=16)
and chroma (width=8).
author | rbultje |
---|---|
date | Tue, 20 Jul 2010 22:58:56 +0000 |
parents | 563339ea87aa |
children | 9eef00a43280 |
comparison
equal
deleted
inserted
replaced
12204:563339ea87aa | 12205:d38e8565ba05 |
---|---|
253 int s, int e, int i, int hvt); | 253 int s, int e, int i, int hvt); |
254 extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV, | 254 extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV, |
255 int s, int e, int i, int hvt); | 255 int s, int e, int i, int hvt); |
256 extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV, | 256 extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV, |
257 int s, int e, int i, int hvt); | 257 int s, int e, int i, int hvt); |
258 | |
259 extern void ff_vp8_v_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride, | |
260 int e, int i, int hvt); | |
261 extern void ff_vp8_v_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride, | |
262 int e, int i, int hvt); | |
263 extern void ff_vp8_v_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride, | |
264 int e, int i, int hvt); | |
265 extern void ff_vp8_h_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride, | |
266 int e, int i, int hvt); | |
267 extern void ff_vp8_h_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride, | |
268 int e, int i, int hvt); | |
269 extern void ff_vp8_h_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride, | |
270 int e, int i, int hvt); | |
271 | |
272 extern void ff_vp8_v_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV, | |
273 int s, int e, int i, int hvt); | |
274 extern void ff_vp8_v_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV, | |
275 int s, int e, int i, int hvt); | |
276 extern void ff_vp8_v_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV, | |
277 int s, int e, int i, int hvt); | |
278 extern void ff_vp8_h_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV, | |
279 int s, int e, int i, int hvt); | |
280 extern void ff_vp8_h_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV, | |
281 int s, int e, int i, int hvt); | |
282 extern void ff_vp8_h_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV, | |
283 int s, int e, int i, int hvt); | |
258 #endif | 284 #endif |
259 | 285 |
260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ | 286 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ |
261 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ | 287 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ |
262 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ | 288 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ |
299 | 325 |
300 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; | 326 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; |
301 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; | 327 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; |
302 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; | 328 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; |
303 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; | 329 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; |
330 | |
331 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx; | |
332 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx; | |
333 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx; | |
334 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx; | |
304 } | 335 } |
305 | 336 |
306 /* note that 4-tap width=16 functions are missing because w=16 | 337 /* note that 4-tap width=16 functions are missing because w=16 |
307 * is only used for luma, and luma is always a copy or sixtap. */ | 338 * is only used for luma, and luma is always a copy or sixtap. */ |
308 if (mm_flags & FF_MM_MMX2) { | 339 if (mm_flags & FF_MM_MMX2) { |
319 | 350 |
320 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; | 351 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; |
321 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; | 352 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; |
322 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; | 353 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; |
323 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; | 354 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; |
355 | |
356 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; | |
357 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; | |
358 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; | |
359 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; | |
324 } | 360 } |
325 | 361 |
326 if (mm_flags & FF_MM_SSE) { | 362 if (mm_flags & FF_MM_SSE) { |
327 c->put_vp8_epel_pixels_tab[0][0][0] = | 363 c->put_vp8_epel_pixels_tab[0][0][0] = |
328 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; | 364 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; |
337 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; | 373 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; |
338 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; | 374 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; |
339 | 375 |
340 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; | 376 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; |
341 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; | 377 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; |
378 | |
379 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; | |
380 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; | |
342 } | 381 } |
343 | 382 |
344 if (mm_flags & FF_MM_SSE2) { | 383 if (mm_flags & FF_MM_SSE2) { |
345 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; | 384 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; |
346 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; | 385 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; |
386 | |
387 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; | |
388 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; | |
347 } | 389 } |
348 | 390 |
349 if (mm_flags & FF_MM_SSSE3) { | 391 if (mm_flags & FF_MM_SSSE3) { |
350 VP8_LUMA_MC_FUNC(0, 16, ssse3); | 392 VP8_LUMA_MC_FUNC(0, 16, ssse3); |
351 VP8_MC_FUNC(1, 8, ssse3); | 393 VP8_MC_FUNC(1, 8, ssse3); |