diff i386/dsputil_mmx.c @ 8288:800444234375 libavcodec

clear_block mmx
author lorenm
date Wed, 10 Dec 2008 21:35:17 +0000
parents cf4d575b1982
children 08b0f63a91c5
line wrap: on
line diff
--- a/i386/dsputil_mmx.c	Wed Dec 10 21:26:00 2008 +0000
+++ b/i386/dsputil_mmx.c	Wed Dec 10 21:35:17 2008 +0000
@@ -464,21 +464,42 @@
         );
 }
 
-static void clear_blocks_mmx(DCTELEM *blocks)
+#define CLEAR_BLOCKS(name,n) \
+static void name(DCTELEM *blocks)\
+{\
+    __asm__ volatile(\
+                "pxor %%mm7, %%mm7              \n\t"\
+                "mov     %1, %%"REG_a"          \n\t"\
+                "1:                             \n\t"\
+                "movq %%mm7, (%0, %%"REG_a")    \n\t"\
+                "movq %%mm7, 8(%0, %%"REG_a")   \n\t"\
+                "movq %%mm7, 16(%0, %%"REG_a")  \n\t"\
+                "movq %%mm7, 24(%0, %%"REG_a")  \n\t"\
+                "add $32, %%"REG_a"             \n\t"\
+                " js 1b                         \n\t"\
+                : : "r" (((uint8_t *)blocks)+128*n),\
+                    "i" (-128*n)\
+                : "%"REG_a\
+        );\
+}
+CLEAR_BLOCKS(clear_blocks_mmx, 6)
+CLEAR_BLOCKS(clear_block_mmx, 1)
+
+static void clear_block_sse(DCTELEM *block)
 {
     __asm__ volatile(
-                "pxor %%mm7, %%mm7              \n\t"
-                "mov $-128*6, %%"REG_a"         \n\t"
-                "1:                             \n\t"
-                "movq %%mm7, (%0, %%"REG_a")    \n\t"
-                "movq %%mm7, 8(%0, %%"REG_a")   \n\t"
-                "movq %%mm7, 16(%0, %%"REG_a")  \n\t"
-                "movq %%mm7, 24(%0, %%"REG_a")  \n\t"
-                "add $32, %%"REG_a"             \n\t"
-                " js 1b                         \n\t"
-                : : "r" (((uint8_t *)blocks)+128*6)
-                : "%"REG_a
-        );
+        "xorps  %%xmm0, %%xmm0  \n"
+        "movaps %%xmm0,    (%0) \n"
+        "movaps %%xmm0,  16(%0) \n"
+        "movaps %%xmm0,  32(%0) \n"
+        "movaps %%xmm0,  48(%0) \n"
+        "movaps %%xmm0,  64(%0) \n"
+        "movaps %%xmm0,  80(%0) \n"
+        "movaps %%xmm0,  96(%0) \n"
+        "movaps %%xmm0, 112(%0) \n"
+        :: "r"(block)
+        : "memory"
+    );
 }
 
 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
@@ -2569,7 +2590,10 @@
         c->put_pixels_clamped = put_pixels_clamped_mmx;
         c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
         c->add_pixels_clamped = add_pixels_clamped_mmx;
+        c->clear_block  = clear_block_mmx;
         c->clear_blocks = clear_blocks_mmx;
+        if (mm_flags & FF_MM_SSE)
+            c->clear_block = clear_block_sse;
 
 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
         c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \