Mercurial > libavcodec.hg
comparison x86/vp8dsp.asm @ 12275:709d5848abf8 libavcodec
Save a register (or regsize of stackspace for x86-32) for the no-loop
mbedge loopfilter functions, by re-using space that holds a variable
that we no longer need.
author | rbultje |
---|---|
date | Mon, 26 Jul 2010 14:00:15 +0000 |
parents | 1d207bb5cd29 |
children | 1c299b8f2930 |
comparison
equal
deleted
inserted
replaced
12274:1d207bb5cd29 | 12275:709d5848abf8 |
---|---|
2198 SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh | 2198 SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh |
2199 | 2199 |
2200 ; align stack | 2200 ; align stack |
2201 mov stack_reg, rsp ; backup stack pointer | 2201 mov stack_reg, rsp ; backup stack pointer |
2202 and rsp, ~(mmsize-1) ; align stack | 2202 and rsp, ~(mmsize-1) ; align stack |
2203 %ifidn %2, sse2 | |
2204 sub rsp, mmsize * 7 | |
2205 %else | |
2203 sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr | 2206 sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr |
2204 ; [3]=hev() result | 2207 ; [3]=hev() result |
2205 ; [4]=filter tmp result | 2208 ; [4]=filter tmp result |
2206 ; [5]/[6] = p2/q2 backup | 2209 ; [5]/[6] = p2/q2 backup |
2207 ; [7]=lim_res sign result | 2210 ; [7]=lim_res sign result |
2211 %endif | |
2208 | 2212 |
2209 %define flim_E [rsp] | 2213 %define flim_E [rsp] |
2210 %define flim_I [rsp+mmsize] | 2214 %define flim_I [rsp+mmsize] |
2211 %define hev_thr [rsp+mmsize*2] | 2215 %define hev_thr [rsp+mmsize*2] |
2212 %define mask_res [rsp+mmsize*3] | 2216 %define mask_res [rsp+mmsize*3] |
2213 %define lim_res [rsp+mmsize*4] | 2217 %define lim_res [rsp+mmsize*4] |
2214 %define p0backup [rsp+mmsize*3] | 2218 %define p0backup [rsp+mmsize*3] |
2215 %define q0backup [rsp+mmsize*4] | 2219 %define q0backup [rsp+mmsize*4] |
2216 %define p2backup [rsp+mmsize*5] | 2220 %define p2backup [rsp+mmsize*5] |
2217 %define q2backup [rsp+mmsize*6] | 2221 %define q2backup [rsp+mmsize*6] |
2222 %ifidn %2, sse2 | |
2223 %define lim_sign [rsp] | |
2224 %else | |
2218 %define lim_sign [rsp+mmsize*7] | 2225 %define lim_sign [rsp+mmsize*7] |
2226 %endif | |
2219 | 2227 |
2220 mova flim_E, m0 | 2228 mova flim_E, m0 |
2221 mova flim_I, m1 | 2229 mova flim_I, m1 |
2222 mova hev_thr, m2 | 2230 mova hev_thr, m2 |
2223 | 2231 |
2230 %define lim_res m8 | 2238 %define lim_res m8 |
2231 %define p0backup m12 | 2239 %define p0backup m12 |
2232 %define q0backup m8 | 2240 %define q0backup m8 |
2233 %define p2backup m13 | 2241 %define p2backup m13 |
2234 %define q2backup m14 | 2242 %define q2backup m14 |
2235 %define lim_sign m15 | 2243 %define lim_sign m9 |
2236 | 2244 |
2237 ; splat function arguments | 2245 ; splat function arguments |
2238 SPLATB_REG flim_E, E_reg, m7 ; E | 2246 SPLATB_REG flim_E, E_reg, m7 ; E |
2239 SPLATB_REG flim_I, I_reg, m7 ; I | 2247 SPLATB_REG flim_I, I_reg, m7 ; I |
2240 SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh | 2248 SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh |
2636 %endif | 2644 %endif |
2637 pmullw m6, [pw_9] | 2645 pmullw m6, [pw_9] |
2638 pmullw m1, [pw_9] | 2646 pmullw m1, [pw_9] |
2639 paddw m6, m7 | 2647 paddw m6, m7 |
2640 paddw m1, m7 | 2648 paddw m1, m7 |
2641 %ifdef m15 | 2649 %ifdef m9 |
2642 SWAP 7, 15 | 2650 SWAP 7, 9 |
2643 %else | 2651 %else |
2644 mova m7, lim_sign | 2652 mova m7, lim_sign |
2645 %endif | 2653 %endif |
2646 psraw m6, 7 | 2654 psraw m6, 7 |
2647 psraw m1, 7 | 2655 psraw m1, 7 |
2747 MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 | 2755 MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 |
2748 | 2756 |
2749 INIT_XMM | 2757 INIT_XMM |
2750 %define SPLATB_REG SPLATB_REG_SSE2 | 2758 %define SPLATB_REG SPLATB_REG_SSE2 |
2751 %define WRITE_8W WRITE_8W_SSE2 | 2759 %define WRITE_8W WRITE_8W_SSE2 |
2752 MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 | 2760 MBEDGE_LOOPFILTER sse2, v, 5, 16, 15 |
2753 %ifdef m8 | 2761 %ifdef m8 |
2754 MBEDGE_LOOPFILTER sse2, h, 5, 16, 16 | 2762 MBEDGE_LOOPFILTER sse2, h, 5, 16, 15 |
2755 %else | 2763 %else |
2756 MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 | 2764 MBEDGE_LOOPFILTER sse2, h, 6, 16, 15 |
2757 %endif | 2765 %endif |
2758 MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 | 2766 MBEDGE_LOOPFILTER sse2, v, 6, 8, 15 |
2759 MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 | 2767 MBEDGE_LOOPFILTER sse2, h, 6, 8, 15 |
2760 | 2768 |
2761 %define SPLATB_REG SPLATB_REG_SSSE3 | 2769 %define SPLATB_REG SPLATB_REG_SSSE3 |
2762 MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 | 2770 MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15 |
2763 %ifdef m8 | 2771 %ifdef m8 |
2764 MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 | 2772 MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15 |
2765 %else | 2773 %else |
2766 MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16 | 2774 MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15 |
2767 %endif | 2775 %endif |
2768 MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16 | 2776 MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15 |
2769 MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16 | 2777 MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15 |
2770 | 2778 |
2771 %define WRITE_8W WRITE_8W_SSE4 | 2779 %define WRITE_8W WRITE_8W_SSE4 |
2772 %ifdef m8 | 2780 %ifdef m8 |
2773 MBEDGE_LOOPFILTER sse4, h, 5, 16, 16 | 2781 MBEDGE_LOOPFILTER sse4, h, 5, 16, 15 |
2774 %else | 2782 %else |
2775 MBEDGE_LOOPFILTER sse4, h, 6, 16, 16 | 2783 MBEDGE_LOOPFILTER sse4, h, 6, 16, 15 |
2776 %endif | 2784 %endif |
2777 MBEDGE_LOOPFILTER sse4, h, 6, 8, 16 | 2785 MBEDGE_LOOPFILTER sse4, h, 6, 8, 15 |