changeset 35585:7d8f561558fc

Reduce register usage in an asm block. Reduce to 4 registers in the asm block of render_frame_yuv422_sse4. After this modification, the function is only ~3.4x faster than render_frame_yuv422.
author upsuper
date Fri, 14 Dec 2012 02:16:36 +0000
parents fb92c6de7f70
children be4dcfb3620c
files libmpcodecs/vf_ass.c
diffstat 1 files changed, 10 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/libmpcodecs/vf_ass.c	Fri Dec 14 02:16:30 2012 +0000
+++ b/libmpcodecs/vf_ass.c	Fri Dec 14 02:16:36 2012 +0000
@@ -274,9 +274,12 @@
                 "psrlw      $8, %%xmm3 \n\t"
                 "packuswb   %%xmm7, %%xmm1 \n\t"
                 "packuswb   %%xmm7, %%xmm3 \n\t"
-                "movq       (%[src_y], %[j], 1),    %%xmm4 \n\t"
-                "movq       (%[src_u], %[j], 1),    %%xmm5 \n\t"
-                "movq       (%[src_v], %[j], 1),    %%xmm6 \n\t"
+                "mov        %[src_y],   %%"REG_S" \n\t"
+                "movq       (%%"REG_S", %[j], 1),   %%xmm4 \n\t"
+                "mov        %[src_u],   %%"REG_S" \n\t"
+                "movq       (%%"REG_S", %[j], 1),   %%xmm5 \n\t"
+                "mov        %[src_v],   %%"REG_S" \n\t"
+                "movq       (%%"REG_S", %[j], 1),   %%xmm6 \n\t"
                 "packuswb   %%xmm7, %%xmm5 \n\t"
                 "packuswb   %%xmm7, %%xmm6 \n\t"
                 "punpcklbw  %%xmm6, %%xmm5 \n\t"
@@ -302,12 +305,13 @@
 
                 : : [dst]   "r" (dst + i * stride),
                     [alpha] "r" (alpha + i * outw),
-                    [src_y] "r" (src_y + i * outw),
-                    [src_u] "r" (src_u + i * outw),
-                    [src_v] "r" (src_v + i * outw),
+                    [src_y] "g" (src_y + i * outw),
+                    [src_u] "g" (src_u + i * outw),
+                    [src_v] "g" (src_v + i * outw),
                     [j]     "r" (xmin),
                     [xmax]  "g" (xmax),
                     [f]     "g" (is_uyvy)
+                : REG_S
         );
     }
 }