Mercurial > libavcodec.hg
changeset 12275:709d5848abf8 libavcodec
Save a register (or regsize of stackspace for x86-32) for the no-loop
mbedge loopfilter functions, by re-using space that holds a variable
that we no longer need.
author | rbultje |
---|---|
date | Mon, 26 Jul 2010 14:00:15 +0000 |
parents | 1d207bb5cd29 |
children | 1c299b8f2930 |
files | x86/vp8dsp.asm |
diffstat | 1 files changed, 24 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/x86/vp8dsp.asm Mon Jul 26 13:56:51 2010 +0000 +++ b/x86/vp8dsp.asm Mon Jul 26 14:00:15 2010 +0000 @@ -2200,11 +2200,15 @@ ; align stack mov stack_reg, rsp ; backup stack pointer and rsp, ~(mmsize-1) ; align stack +%ifidn %2, sse2 + sub rsp, mmsize * 7 +%else sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr ; [3]=hev() result ; [4]=filter tmp result ; [5]/[6] = p2/q2 backup ; [7]=lim_res sign result +%endif %define flim_E [rsp] %define flim_I [rsp+mmsize] @@ -2215,7 +2219,11 @@ %define q0backup [rsp+mmsize*4] %define p2backup [rsp+mmsize*5] %define q2backup [rsp+mmsize*6] +%ifidn %2, sse2 +%define lim_sign [rsp] +%else %define lim_sign [rsp+mmsize*7] +%endif mova flim_E, m0 mova flim_I, m1 @@ -2232,7 +2240,7 @@ %define q0backup m8 %define p2backup m13 %define q2backup m14 -%define lim_sign m15 +%define lim_sign m9 ; splat function arguments SPLATB_REG flim_E, E_reg, m7 ; E @@ -2638,8 +2646,8 @@ pmullw m1, [pw_9] paddw m6, m7 paddw m1, m7 -%ifdef m15 - SWAP 7, 15 +%ifdef m9 + SWAP 7, 9 %else mova m7, lim_sign %endif @@ -2749,29 +2757,29 @@ INIT_XMM %define SPLATB_REG SPLATB_REG_SSE2 %define WRITE_8W WRITE_8W_SSE2 -MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER sse2, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 -MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse2, v, 6, 8, 15 +MBEDGE_LOOPFILTER sse2, h, 6, 8, 15 %define SPLATB_REG SPLATB_REG_SSSE3 -MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16 -MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16 +MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15 +MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15 %define WRITE_8W WRITE_8W_SSE4 %ifdef m8 -MBEDGE_LOOPFILTER sse4, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse4, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse4, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 8, 15