comparison x86/vp8dsp.asm @ 12275:709d5848abf8 libavcodec

Save a register (or regsize of stackspace for x86-32) for the no-loop mbedge loopfilter functions, by re-using space that holds a variable that we no longer need.
author rbultje
date Mon, 26 Jul 2010 14:00:15 +0000
parents 1d207bb5cd29
children 1c299b8f2930
comparison
equal deleted inserted replaced
12274:1d207bb5cd29 12275:709d5848abf8
2198 SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh 2198 SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh
2199 2199
2200 ; align stack 2200 ; align stack
2201 mov stack_reg, rsp ; backup stack pointer 2201 mov stack_reg, rsp ; backup stack pointer
2202 and rsp, ~(mmsize-1) ; align stack 2202 and rsp, ~(mmsize-1) ; align stack
2203 %ifidn %2, sse2
2204 sub rsp, mmsize * 7
2205 %else
2203 sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr 2206 sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr
2204 ; [3]=hev() result 2207 ; [3]=hev() result
2205 ; [4]=filter tmp result 2208 ; [4]=filter tmp result
2206 ; [5]/[6] = p2/q2 backup 2209 ; [5]/[6] = p2/q2 backup
2207 ; [7]=lim_res sign result 2210 ; [7]=lim_res sign result
2211 %endif
2208 2212
2209 %define flim_E [rsp] 2213 %define flim_E [rsp]
2210 %define flim_I [rsp+mmsize] 2214 %define flim_I [rsp+mmsize]
2211 %define hev_thr [rsp+mmsize*2] 2215 %define hev_thr [rsp+mmsize*2]
2212 %define mask_res [rsp+mmsize*3] 2216 %define mask_res [rsp+mmsize*3]
2213 %define lim_res [rsp+mmsize*4] 2217 %define lim_res [rsp+mmsize*4]
2214 %define p0backup [rsp+mmsize*3] 2218 %define p0backup [rsp+mmsize*3]
2215 %define q0backup [rsp+mmsize*4] 2219 %define q0backup [rsp+mmsize*4]
2216 %define p2backup [rsp+mmsize*5] 2220 %define p2backup [rsp+mmsize*5]
2217 %define q2backup [rsp+mmsize*6] 2221 %define q2backup [rsp+mmsize*6]
2222 %ifidn %2, sse2
2223 %define lim_sign [rsp]
2224 %else
2218 %define lim_sign [rsp+mmsize*7] 2225 %define lim_sign [rsp+mmsize*7]
2226 %endif
2219 2227
2220 mova flim_E, m0 2228 mova flim_E, m0
2221 mova flim_I, m1 2229 mova flim_I, m1
2222 mova hev_thr, m2 2230 mova hev_thr, m2
2223 2231
2230 %define lim_res m8 2238 %define lim_res m8
2231 %define p0backup m12 2239 %define p0backup m12
2232 %define q0backup m8 2240 %define q0backup m8
2233 %define p2backup m13 2241 %define p2backup m13
2234 %define q2backup m14 2242 %define q2backup m14
2235 %define lim_sign m15 2243 %define lim_sign m9
2236 2244
2237 ; splat function arguments 2245 ; splat function arguments
2238 SPLATB_REG flim_E, E_reg, m7 ; E 2246 SPLATB_REG flim_E, E_reg, m7 ; E
2239 SPLATB_REG flim_I, I_reg, m7 ; I 2247 SPLATB_REG flim_I, I_reg, m7 ; I
2240 SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh 2248 SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh
2636 %endif 2644 %endif
2637 pmullw m6, [pw_9] 2645 pmullw m6, [pw_9]
2638 pmullw m1, [pw_9] 2646 pmullw m1, [pw_9]
2639 paddw m6, m7 2647 paddw m6, m7
2640 paddw m1, m7 2648 paddw m1, m7
2641 %ifdef m15 2649 %ifdef m9
2642 SWAP 7, 15 2650 SWAP 7, 9
2643 %else 2651 %else
2644 mova m7, lim_sign 2652 mova m7, lim_sign
2645 %endif 2653 %endif
2646 psraw m6, 7 2654 psraw m6, 7
2647 psraw m1, 7 2655 psraw m1, 7
2747 MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 2755 MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0
2748 2756
2749 INIT_XMM 2757 INIT_XMM
2750 %define SPLATB_REG SPLATB_REG_SSE2 2758 %define SPLATB_REG SPLATB_REG_SSE2
2751 %define WRITE_8W WRITE_8W_SSE2 2759 %define WRITE_8W WRITE_8W_SSE2
2752 MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 2760 MBEDGE_LOOPFILTER sse2, v, 5, 16, 15
2753 %ifdef m8 2761 %ifdef m8
2754 MBEDGE_LOOPFILTER sse2, h, 5, 16, 16 2762 MBEDGE_LOOPFILTER sse2, h, 5, 16, 15
2755 %else 2763 %else
2756 MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 2764 MBEDGE_LOOPFILTER sse2, h, 6, 16, 15
2757 %endif 2765 %endif
2758 MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 2766 MBEDGE_LOOPFILTER sse2, v, 6, 8, 15
2759 MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 2767 MBEDGE_LOOPFILTER sse2, h, 6, 8, 15
2760 2768
2761 %define SPLATB_REG SPLATB_REG_SSSE3 2769 %define SPLATB_REG SPLATB_REG_SSSE3
2762 MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 2770 MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15
2763 %ifdef m8 2771 %ifdef m8
2764 MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 2772 MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15
2765 %else 2773 %else
2766 MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16 2774 MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15
2767 %endif 2775 %endif
2768 MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16 2776 MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15
2769 MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16 2777 MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15
2770 2778
2771 %define WRITE_8W WRITE_8W_SSE4 2779 %define WRITE_8W WRITE_8W_SSE4
2772 %ifdef m8 2780 %ifdef m8
2773 MBEDGE_LOOPFILTER sse4, h, 5, 16, 16 2781 MBEDGE_LOOPFILTER sse4, h, 5, 16, 15
2774 %else 2782 %else
2775 MBEDGE_LOOPFILTER sse4, h, 6, 16, 16 2783 MBEDGE_LOOPFILTER sse4, h, 6, 16, 15
2776 %endif 2784 %endif
2777 MBEDGE_LOOPFILTER sse4, h, 6, 8, 16 2785 MBEDGE_LOOPFILTER sse4, h, 6, 8, 15