Mercurial > mplayer.hg
changeset 35585:7d8f561558fc
Reduce register usage in an asm block.
Reduce to 4 registers in the asm block of render_frame_yuv422_sse4.
After this modification, the function is only ~3.4x faster than
render_frame_yuv422.
author | upsuper |
---|---|
date | Fri, 14 Dec 2012 02:16:36 +0000 |
parents | fb92c6de7f70 |
children | be4dcfb3620c |
files | libmpcodecs/vf_ass.c |
diffstat | 1 files changed, 10 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/libmpcodecs/vf_ass.c Fri Dec 14 02:16:30 2012 +0000 +++ b/libmpcodecs/vf_ass.c Fri Dec 14 02:16:36 2012 +0000 @@ -274,9 +274,12 @@ "psrlw $8, %%xmm3 \n\t" "packuswb %%xmm7, %%xmm1 \n\t" "packuswb %%xmm7, %%xmm3 \n\t" - "movq (%[src_y], %[j], 1), %%xmm4 \n\t" - "movq (%[src_u], %[j], 1), %%xmm5 \n\t" - "movq (%[src_v], %[j], 1), %%xmm6 \n\t" + "mov %[src_y], %%"REG_S" \n\t" + "movq (%%"REG_S", %[j], 1), %%xmm4 \n\t" + "mov %[src_u], %%"REG_S" \n\t" + "movq (%%"REG_S", %[j], 1), %%xmm5 \n\t" + "mov %[src_v], %%"REG_S" \n\t" + "movq (%%"REG_S", %[j], 1), %%xmm6 \n\t" "packuswb %%xmm7, %%xmm5 \n\t" "packuswb %%xmm7, %%xmm6 \n\t" "punpcklbw %%xmm6, %%xmm5 \n\t" @@ -302,12 +305,13 @@ : : [dst] "r" (dst + i * stride), [alpha] "r" (alpha + i * outw), - [src_y] "r" (src_y + i * outw), - [src_u] "r" (src_u + i * outw), - [src_v] "r" (src_v + i * outw), + [src_y] "g" (src_y + i * outw), + [src_u] "g" (src_u + i * outw), + [src_v] "g" (src_v + i * outw), [j] "r" (xmin), [xmax] "g" (xmax), [f] "g" (is_uyvy) + : REG_S ); } }