changeset 3536:545a15c19c91 libavcodec

sse & sse2 implementations of vorbis channel coupling. 9% faster vorbis (on a K8).
author lorenm
date Thu, 03 Aug 2006 03:18:47 +0000
parents a14c98a0ca3d
children f52e3f60481b
files dsputil.c dsputil.h i386/dsputil_mmx.c vorbis.c vorbis.h
diffstat 5 files changed, 95 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/dsputil.c	Thu Aug 03 02:18:07 2006 +0000
+++ b/dsputil.c	Thu Aug 03 03:18:47 2006 +0000
@@ -35,6 +35,9 @@
 /* snow.c */
 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
 
+/* vorbis.c */
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
+
 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
 uint32_t squareTbl[512] = {0, };
 
@@ -4090,6 +4093,10 @@
     c->inner_add_yblock = ff_snow_inner_add_yblock;
 #endif
 
+#ifdef CONFIG_VORBIS_DECODER
+    c->vorbis_inverse_coupling = vorbis_inverse_coupling;
+#endif
+
     c->shrink[0]= ff_img_copy_plane;
     c->shrink[1]= ff_shrink22;
     c->shrink[2]= ff_shrink44;
--- a/dsputil.h	Thu Aug 03 02:18:07 2006 +0000
+++ b/dsputil.h	Thu Aug 03 03:18:47 2006 +0000
@@ -307,6 +307,8 @@
 
     void (*h261_loop_filter)(uint8_t *src, int stride);
 
+    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+
     /* (I)DCT */
     void (*fdct)(DCTELEM *block/* align 16*/);
     void (*fdct248)(DCTELEM *block/* align 16*/);
--- a/i386/dsputil_mmx.c	Thu Aug 03 02:18:07 2006 +0000
+++ b/i386/dsputil_mmx.c	Thu Aug 03 03:18:47 2006 +0000
@@ -2711,6 +2711,59 @@
 }
 #endif
 
+static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+{
+    int i;
+    asm volatile("pxor %%mm7, %%mm7":);
+    for(i=0; i<blocksize; i+=2) {
+        asm volatile(
+            "movq    %0,    %%mm0 \n\t"
+            "movq    %1,    %%mm1 \n\t"
+            "movq    %%mm0, %%mm2 \n\t"
+            "movq    %%mm1, %%mm3 \n\t"
+            "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
+            "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
+            "pslld   $31,   %%mm2 \n\t" // keep only the sign bit
+            "pxor    %%mm2, %%mm1 \n\t"
+            "movq    %%mm3, %%mm4 \n\t"
+            "pand    %%mm1, %%mm3 \n\t"
+            "pandn   %%mm1, %%mm4 \n\t"
+            "pfadd   %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+            "pfsub   %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+            "movq    %%mm3, %1    \n\t"
+            "movq    %%mm0, %0    \n\t"
+            :"+m"(mag[i]), "+m"(ang[i])
+            ::"memory"
+        );
+    }
+    asm volatile("emms");
+}
+static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize)
+{
+    int i;
+    for(i=0; i<blocksize; i+=4) {
+        asm volatile(
+            "movaps  %0,     %%xmm0 \n\t"
+            "movaps  %1,     %%xmm1 \n\t"
+            "pxor    %%xmm2, %%xmm2 \n\t"
+            "pxor    %%xmm3, %%xmm3 \n\t"
+            "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+            "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+            "pslld   $31,    %%xmm2 \n\t" // keep only the sign bit
+            "pxor    %%xmm2, %%xmm1 \n\t"
+            "movaps  %%xmm3, %%xmm4 \n\t"
+            "pand    %%xmm1, %%xmm3 \n\t"
+            "pandn   %%xmm1, %%xmm4 \n\t"
+            "addps   %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+            "subps   %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+            "movaps  %%xmm3, %1     \n\t"
+            "movaps  %%xmm0, %0     \n\t"
+            :"+m"(mag[i]), "+m"(ang[i])
+            ::"memory"
+        );
+    }
+}
+
 #ifdef CONFIG_SNOW_ENCODER
 extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
 extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
@@ -3137,6 +3190,11 @@
             c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
         }
 #endif
+
+        if(mm_flags & MM_SSE2)
+            c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2;
+        else if(mm_flags & MM_SSE)
+            c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
     }
 
 #ifdef CONFIG_ENCODERS
--- a/vorbis.c	Thu Aug 03 02:18:07 2006 +0000
+++ b/vorbis.c	Thu Aug 03 03:18:47 2006 +0000
@@ -929,6 +929,7 @@
     int i, j, hdr_type;
 
     vc->avccontext = avccontext;
+    dsputil_init(&vc->dsp, avccontext);
 
     if (!headers_len) {
         av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
@@ -1443,6 +1444,31 @@
     return 0;
 }
 
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
+{
+    int i;
+    for(i=0; i<blocksize; i++)
+    {
+        if (mag[i]>0.0) {
+            if (ang[i]>0.0) {
+                ang[i]=mag[i]-ang[i];
+            } else {
+                float temp=ang[i];
+                ang[i]=mag[i];
+                mag[i]+=temp;
+            }
+        } else {
+            if (ang[i]>0.0) {
+                ang[i]+=mag[i];
+            } else {
+                float temp=ang[i];
+                ang[i]=mag[i];
+                mag[i]-=temp;
+            }
+        }
+    }
+}
+
 // Decode the audio packet using the functions above
 #define BIAS 385
 
@@ -1541,26 +1567,7 @@
 
         mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
         ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
-        for(j=0;j<blocksize/2;++j) {
-            float temp;
-            if (mag[j]>0.0) {
-                if (ang[j]>0.0) {
-                    ang[j]=mag[j]-ang[j];
-                } else {
-                    temp=ang[j];
-                    ang[j]=mag[j];
-                    mag[j]+=temp;
-                }
-            } else {
-                if (ang[j]>0.0) {
-                    ang[j]+=mag[j];
-                } else {
-                    temp=ang[j];
-                    ang[j]=mag[j];
-                    mag[j]-=temp;
-                }
-            }
-        }
+        vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
     }
 
 // Dotproduct
--- a/vorbis.h	Thu Aug 03 02:18:07 2006 +0000
+++ b/vorbis.h	Thu Aug 03 03:18:47 2006 +0000
@@ -87,6 +87,7 @@
 typedef struct vorbis_context_s {
     AVCodecContext *avccontext;
     GetBitContext gb;
+    DSPContext dsp;
 
     MDCTContext mdct0;
     MDCTContext mdct1;