changeset 1984:ef919e9ef73e libavcodec

separate out put_signed_pixels_clamped() into its own function and implement an optimized MMX version of the function
author melanson
date Tue, 27 Apr 2004 03:58:06 +0000
parents 1205bf58c420
children b2bc62fdecc0
files dsputil.c dsputil.h i386/dsputil_mmx.c vp3.c
diffstat 4 files changed, 46 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/dsputil.c	Mon Apr 26 21:46:01 2004 +0000
+++ b/dsputil.c	Tue Apr 27 03:58:06 2004 +0000
@@ -332,6 +332,27 @@
     }
 }
 
+static void put_signed_pixels_clamped_c(const DCTELEM *block, 
+                                        uint8_t *restrict pixels,
+                                        int line_size)
+{
+    int i, j;
+
+    for (i = 0; i < 8; i++) {
+        for (j = 0; j < 8; j++) {
+            if (*block < -128)
+                *pixels = 0;
+            else if (*block > 127)
+                *pixels = 255;
+            else
+                *pixels = (uint8_t)(*block + 128);
+            block++;
+            pixels++;
+        }
+        pixels += (line_size - 8);
+    }
+}
+
 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
                           int line_size)
 {
@@ -3131,6 +3152,7 @@
     c->get_pixels = get_pixels_c;
     c->diff_pixels = diff_pixels_c;
     c->put_pixels_clamped = put_pixels_clamped_c;
+    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
     c->add_pixels_clamped = add_pixels_clamped_c;
     c->gmc1 = gmc1_c;
     c->gmc = gmc_c;
--- a/dsputil.h	Mon Apr 26 21:46:01 2004 +0000
+++ b/dsputil.h	Tue Apr 27 03:58:06 2004 +0000
@@ -137,6 +137,7 @@
     void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
     void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
     void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
     /**
      * translational global motion compensation.
@@ -374,6 +375,7 @@
 
 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 
 static inline void emms(void)
 {
--- a/i386/dsputil_mmx.c	Mon Apr 26 21:46:01 2004 +0000
+++ b/i386/dsputil_mmx.c	Tue Apr 27 03:58:06 2004 +0000
@@ -22,6 +22,7 @@
 
 #include "../dsputil.h"
 #include "../simple_idct.h"
+#include "mmx.h"
 
 //#undef NDEBUG
 //#include <assert.h>
@@ -293,6 +294,23 @@
 	    :"memory");
 }
 
+void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+    int i;
+    unsigned char __align8 vector128[8] =
+      { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
+
+    movq_m2r(*vector128, mm1);
+    for (i = 0; i < 8; i++) {
+        movq_m2r(*(block), mm0);
+        packsswb_m2r(*(block + 4), mm0);
+        block += 8;
+        paddb_r2r(mm1, mm0);
+        movq_r2m(mm0, *pixels);
+        pixels += line_size;
+    }
+}
+
 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
 {
     const DCTELEM *p;
@@ -2160,6 +2178,7 @@
         c->diff_pixels = diff_pixels_mmx;
 #endif //CONFIG_ENCODERS
         c->put_pixels_clamped = put_pixels_clamped_mmx;
+        c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
         c->add_pixels_clamped = add_pixels_clamped_mmx;
         c->clear_blocks = clear_blocks_mmx;
 #ifdef CONFIG_ENCODERS
--- a/vp3.c	Mon Apr 26 21:46:01 2004 +0000
+++ b/vp3.c	Tue Apr 27 03:58:06 2004 +0000
@@ -2061,10 +2061,6 @@
     int motion_halfpel_index;
     uint8_t *motion_source;
 
-    int16_t *op;
-    uint8_t *dest;
-    int j, k;
-
     debug_vp3("  vp3: rendering final fragments for %s\n",
         (plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane");
 
@@ -2186,22 +2182,9 @@
                     s->all_fragments[i].coeff_count,
                     output_samples);
                 if (s->all_fragments[i].coding_method == MODE_INTRA) {
-                    /* this really needs to be optimized sooner or later */
-                    op = output_samples;
-                    dest = output_plane + s->all_fragments[i].first_pixel;
-                    for (j = 0; j < 8; j++) {
-                        for (k = 0; k < 8; k++) {
-                            if (*op < -128)
-                                *dest = 0;
-                            else if (*op > 127)
-                                *dest = 255;
-                            else
-                                *dest = (uint8_t)(*op + 128);
-                            op++;
-                            dest++;
-                        }
-                        dest += (stride - 8);
-                    }
+                    s->dsp.put_signed_pixels_clamped(output_samples,
+                        output_plane + s->all_fragments[i].first_pixel,
+                        stride);
                 } else {
                     s->dsp.add_pixels_clamped(output_samples,
                         output_plane + s->all_fragments[i].first_pixel,