mplayer.hg: libao2/fir.h comparison

comparison libao2/fir.h @ 3631:5f5189ac6a41

Added plugin for fractional resampling (alpha code)

author	anders
date	Thu, 20 Dec 2001 15:30:22 +0000
parents
children	d6f8feeac656

comparison

equal deleted inserted replaced

-:f24527fc1b79
+:5f5189ac6a41
+/*=============================================================================
+//
+//  This file is part of mplayer.
+//
+//  mplayer is free software; you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation; either version 2 of the License, or
+//  (at your option) any later version.
+//
+//  mplayer is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//  GNU General Public License for more details.
+//
+//  You should have received a copy of the GNU General Public License
+//  along with mplayer; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+//  Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au
+//
+//=============================================================================
+*/
+#ifndef __FIR_H__
+#define __FIR_H__
+/* 4, 8 and 16 tap FIR filters implemented using SSE instructions
+int16_t* x Input data
+int16_t* y Output value
+int16_t* w Filter weights
+C function
+for(int i = 0 ; i < L ; i++)
+*y += w[i]*x[i];
+*/
+#ifdef HAVE_SSE
+// This block should be MMX only compatible, but it isn't...
+#ifdef L4
+#define LOAD_QUE(x) \
+__asm __volatile("movq %0, %%mm2\n\t" \
+:                    \
+:"m"((x)[0])         \
+:"memory");
+#define SAVE_QUE(x) \
+__asm __volatile("movq %%mm2, %0\n\t" \
+:"=m"(x[0])          \
+:                    \
+:"memory");
+#define UPDATE_QUE(in) \
+__asm __volatile("psllq   $16,   %%mm2\n\t"    \
+"pinsrw  $0,    %0,%%mm2\n\t" \
+:                            \
+:"m" ((in)[0])               \
+:"memory");
+#define FIR(x,w,y) \
+__asm __volatile("movq	  %%mm2, %%mm0\n\t" \
+"pmaddwd %1,    %%mm0\n\t" \
+"movq    %%mm0, %%mm1\n\t" \
+"psrlq   $32, 	 %%mm1\n\t" \
+"paddd   %%mm0, %%mm1\n\t" \
+"movd    %%mm1, %%esi\n\t" \
+"shrl    $16,   %%esi\n\t" \
+"movw    %%si,  %0\n\t"    \
+			 : "=m" ((y)[0])            \
+			 : "m" ((w)[0])             \
+			 : "memory", "%esi");
+#endif /* L4 */
+// It is possible to make the 8 bit filter a lot faster by using the
+// 128 bit registers, feel free to optimize.
+#ifdef L8
+#define LOAD_QUE(x) \
+__asm __volatile("movq %0, %%mm5\n\t" \
+"movq %1, %%mm4\n\t" \
+:                    \
+:"m"((x)[0]),        \
+"m"((x)[4])         \
+:"memory");
+#define SAVE_QUE(x) \
+__asm __volatile("movq %%mm5, %0\n\t" \
+"movq %%mm4, %1\n\t" \
+:"=m"((x)[0]),       \
+"=m"((x)[4])        \
+:                    \
+:"memory");
+// Below operation could replace line 2 to 5 in macro below but can
+// not cause of compiler bug ???
+// "pextrw $3, %%mm5,%%eax\n\t"
+#define UPDATE_QUE(in) \
+__asm __volatile("psllq    $16,   %%mm4\n\t"        \
+"movq	   %%mm5, %%mm0\n\t" 	    \
+"psrlq    $48,   %%mm0\n\t"        \
+"movd     %%mm0, %%eax\n\t"        \
+			 "pinsrw   $0,    %%eax,%%mm4\n\t"  \
+"psllq    $16,   %%mm5\n\t"        \
+"pinsrw   $0,    %0,%%mm5\n\t"     \
+:                                 \
+:"m" ((in)[0])                    \
+:"memory", "%eax");
+#define FIR(x,w,y) \
+__asm __volatile("movq	  %%mm5, %%mm0\n\t" \
+"pmaddwd %1,    %%mm0\n\t" \
+"movq	  %%mm4, %%mm1\n\t" \
+"pmaddwd %2,    %%mm1\n\t" \
+"paddd   %%mm1, %%mm0\n\t" \
+"movq    %%mm0, %%mm1\n\t" \
+"psrlq   $32, 	 %%mm1\n\t" \
+"paddd   %%mm0, %%mm1\n\t" \
+"movd    %%mm1, %%esi\n\t" \
+"shrl    $16,   %%esi\n\t" \
+"movw    %%si,  %0\n\t"    \
+			 : "=m" ((y)[0])            \
+			 : "m" ((w)[0]),            \
+			   "m" ((w)[4])             \
+			 : "memory", "%esi");
+#endif /* L8 */
+#else /* HAVE_SSE */
+#define LOAD_QUE(x)
+#define SAVE_QUE(x)
+#define UPDATE_QUE(inm) \
+xi=(--xi)&(L-1);     \
+x[xi]=x[xi+L]=*inm;
+#ifdef L4
+#define FIR(x,w,y) \
+y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16;
+#else
+#define FIR(x,w,y){ \
+int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \
+int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \
+y[0]      = a+b; \
+}
+#endif /* L4 */
+#endif /* HAVE_SSE */
+#endif /* __FIR_H__ */

Mercurial > mplayer.hg

comparison libao2/fir.h @ 3631:5f5189ac6a41