comparison x86/vp8dsp-init.c @ 12205:d38e8565ba05 libavcodec

VP8 MBedge loopfilter MMX/MMX2/SSE2 functions for both luma (width=16) and chroma (width=8).
author rbultje
date Tue, 20 Jul 2010 22:58:56 +0000
parents 563339ea87aa
children 9eef00a43280
comparison
equal deleted inserted replaced
12204:563339ea87aa 12205:d38e8565ba05
253 int s, int e, int i, int hvt); 253 int s, int e, int i, int hvt);
254 extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV, 254 extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
255 int s, int e, int i, int hvt); 255 int s, int e, int i, int hvt);
256 extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV, 256 extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
257 int s, int e, int i, int hvt); 257 int s, int e, int i, int hvt);
258
259 extern void ff_vp8_v_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride,
260 int e, int i, int hvt);
261 extern void ff_vp8_v_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride,
262 int e, int i, int hvt);
263 extern void ff_vp8_v_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride,
264 int e, int i, int hvt);
265 extern void ff_vp8_h_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride,
266 int e, int i, int hvt);
267 extern void ff_vp8_h_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride,
268 int e, int i, int hvt);
269 extern void ff_vp8_h_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride,
270 int e, int i, int hvt);
271
272 extern void ff_vp8_v_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV,
273 int s, int e, int i, int hvt);
274 extern void ff_vp8_v_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV,
275 int s, int e, int i, int hvt);
276 extern void ff_vp8_v_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV,
277 int s, int e, int i, int hvt);
278 extern void ff_vp8_h_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV,
279 int s, int e, int i, int hvt);
280 extern void ff_vp8_h_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV,
281 int s, int e, int i, int hvt);
282 extern void ff_vp8_h_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV,
283 int s, int e, int i, int hvt);
258 #endif 284 #endif
259 285
260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ 286 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
261 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ 287 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
262 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ 288 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
299 325
300 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; 326 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
301 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; 327 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
302 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; 328 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
303 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; 329 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
330
331 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx;
332 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx;
333 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx;
334 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx;
304 } 335 }
305 336
306 /* note that 4-tap width=16 functions are missing because w=16 337 /* note that 4-tap width=16 functions are missing because w=16
307 * is only used for luma, and luma is always a copy or sixtap. */ 338 * is only used for luma, and luma is always a copy or sixtap. */
308 if (mm_flags & FF_MM_MMX2) { 339 if (mm_flags & FF_MM_MMX2) {
319 350
320 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; 351 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
321 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; 352 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
322 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; 353 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
323 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; 354 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
355
356 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext;
357 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
358 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
359 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
324 } 360 }
325 361
326 if (mm_flags & FF_MM_SSE) { 362 if (mm_flags & FF_MM_SSE) {
327 c->put_vp8_epel_pixels_tab[0][0][0] = 363 c->put_vp8_epel_pixels_tab[0][0][0] =
328 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; 364 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
337 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; 373 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
338 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; 374 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
339 375
340 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; 376 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
341 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; 377 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
378
379 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext;
380 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
342 } 381 }
343 382
344 if (mm_flags & FF_MM_SSE2) { 383 if (mm_flags & FF_MM_SSE2) {
345 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; 384 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
346 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; 385 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
386
387 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
388 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
347 } 389 }
348 390
349 if (mm_flags & FF_MM_SSSE3) { 391 if (mm_flags & FF_MM_SSSE3) {
350 VP8_LUMA_MC_FUNC(0, 16, ssse3); 392 VP8_LUMA_MC_FUNC(0, 16, ssse3);
351 VP8_MC_FUNC(1, 8, ssse3); 393 VP8_MC_FUNC(1, 8, ssse3);