diff libao2/fir.h @ 3631:5f5189ac6a41

Added plugin for fractional resampling (alpha code)
author anders
date Thu, 20 Dec 2001 15:30:22 +0000
parents
children d6f8feeac656
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libao2/fir.h	Thu Dec 20 15:30:22 2001 +0000
@@ -0,0 +1,144 @@
+/*=============================================================================
+//	
+//  This file is part of mplayer.
+//
+//  mplayer is free software; you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation; either version 2 of the License, or
+//  (at your option) any later version.
+//
+//  mplayer is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//  GNU General Public License for more details.
+//
+//  You should have received a copy of the GNU General Public License
+//  along with mplayer; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+//  Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au
+//
+//=============================================================================
+*/
+
+#ifndef __FIR_H__
+#define __FIR_H__
+
+/* 4, 8 and 16 tap FIR filters implemented using SSE instructions 
+   int16_t* x Input data
+   int16_t* y Output value
+   int16_t* w Filter weights 
+   
+   C function
+   for(int i = 0 ; i < L ; i++)
+     *y += w[i]*x[i];
+*/
+
+#ifdef HAVE_SSE
+
+// This block should be MMX only compatible, but it isn't...
+#ifdef L4
+#define LOAD_QUE(x) \
+        __asm __volatile("movq %0, %%mm2\n\t" \
+                         :                    \
+                         :"m"((x)[0])         \
+                         :"memory");
+#define SAVE_QUE(x) \
+        __asm __volatile("movq %%mm2, %0\n\t" \
+                         :"=m"(x[0])          \
+                         :                    \
+                         :"memory");
+#define UPDATE_QUE(in) \
+        __asm __volatile("psllq   $16,   %%mm2\n\t"    \
+                         "pinsrw  $0,    %0,%%mm2\n\t" \
+                          :                            \
+                          :"m" ((in)[0])               \
+                          :"memory");                  
+#define FIR(x,w,y) \
+        __asm __volatile("movq	  %%mm2, %%mm0\n\t" \
+                         "pmaddwd %1,    %%mm0\n\t" \
+                         "movq    %%mm0, %%mm1\n\t" \
+                         "psrlq   $32, 	 %%mm1\n\t" \
+                         "paddd   %%mm0, %%mm1\n\t" \
+                         "movd    %%mm1, %%esi\n\t" \
+                         "shrl    $16,   %%esi\n\t" \
+                         "movw    %%si,  %0\n\t"    \
+			 : "=m" ((y)[0])            \
+			 : "m" ((w)[0])             \
+			 : "memory", "%esi"); 
+#endif /* L4 */
+
+// It is possible to make the 8 bit filter a lot faster by using the
+// 128 bit registers, feel free to optimize.
+#ifdef L8
+#define LOAD_QUE(x) \
+        __asm __volatile("movq %0, %%mm5\n\t" \
+                         "movq %1, %%mm4\n\t" \
+                         :                    \
+                         :"m"((x)[0]),        \
+                          "m"((x)[4])         \
+                         :"memory");
+#define SAVE_QUE(x) \
+        __asm __volatile("movq %%mm5, %0\n\t" \
+                         "movq %%mm4, %1\n\t" \
+                         :"=m"((x)[0]),       \
+                          "=m"((x)[4])        \
+                         :                    \
+                         :"memory");
+
+// Below operation could replace line 2 to 5 in macro below but can
+// not cause of compiler bug ???
+// "pextrw $3, %%mm5,%%eax\n\t"
+#define UPDATE_QUE(in) \
+        __asm __volatile("psllq    $16,   %%mm4\n\t"        \
+                         "movq	   %%mm5, %%mm0\n\t" 	    \
+                         "psrlq    $48,   %%mm0\n\t"        \
+                         "movd     %%mm0, %%eax\n\t"        \
+			 "pinsrw   $0,    %%eax,%%mm4\n\t"  \
+                         "psllq    $16,   %%mm5\n\t"        \
+                         "pinsrw   $0,    %0,%%mm5\n\t"     \
+                          :                                 \
+                          :"m" ((in)[0])                    \
+                          :"memory", "%eax");                  
+#define FIR(x,w,y) \
+        __asm __volatile("movq	  %%mm5, %%mm0\n\t" \
+                         "pmaddwd %1,    %%mm0\n\t" \
+                         "movq	  %%mm4, %%mm1\n\t" \
+                         "pmaddwd %2,    %%mm1\n\t" \
+                         "paddd   %%mm1, %%mm0\n\t" \
+                         "movq    %%mm0, %%mm1\n\t" \
+                         "psrlq   $32, 	 %%mm1\n\t" \
+                         "paddd   %%mm0, %%mm1\n\t" \
+                         "movd    %%mm1, %%esi\n\t" \
+                         "shrl    $16,   %%esi\n\t" \
+                         "movw    %%si,  %0\n\t"    \
+			 : "=m" ((y)[0])            \
+			 : "m" ((w)[0]),            \
+			   "m" ((w)[4])             \
+			 : "memory", "%esi"); 
+#endif /* L8 */
+
+#else /* HAVE_SSE */
+
+#define LOAD_QUE(x)
+#define SAVE_QUE(x)
+#define UPDATE_QUE(inm) \
+  xi=(--xi)&(L-1);     \
+  x[xi]=x[xi+L]=*inm;
+
+#ifdef L4
+#define FIR(x,w,y) \
+        y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16;
+#else
+#define FIR(x,w,y){ \
+  int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \
+  int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \
+  y[0]      = a+b; \
+}
+#endif /* L4 */
+
+#endif /* HAVE_SSE */
+
+#endif /* __FIR_H__ */
+
+