comparison x86/vp8dsp.asm @ 12194:80b142c2e9f7 libavcodec

Change function prototypes for width=8 inner and mbedge loopfilter functions so that it does both U and V planes at the same time. This will have speed advantages when using SSE2 (or higher) optimizations, since we can do both the U and V rows together in a single xmm register. This also renames filter16 to filter16y and filter8 to filter8uv so that it's more obvious what each function is used for.
author rbultje
date Mon, 19 Jul 2010 21:18:04 +0000
parents b24153464669
children e7847fcff0f4
comparison
equal deleted inserted replaced
12193:0a63bed2a00e 12194:80b142c2e9f7
1377 ; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, int stride, 1377 ; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, int stride,
1378 ; int flimE, int flimI, int hev_thr); 1378 ; int flimE, int flimI, int hev_thr);
1379 ;----------------------------------------------------------------------------- 1379 ;-----------------------------------------------------------------------------
1380 1380
1381 %macro INNER_LOOPFILTER 4 1381 %macro INNER_LOOPFILTER 4
1382 cglobal vp8_%2_loop_filter16_inner_%1, 5, %3, %4 1382 cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4
1383 %define dst_reg r0 1383 %define dst_reg r0
1384 %define mstride_reg r1 1384 %define mstride_reg r1
1385 %define E_reg r2 1385 %define E_reg r2
1386 %define I_reg r3 1386 %define I_reg r3
1387 %define hev_thr_reg r4 1387 %define hev_thr_reg r4