Mercurial > mplayer.hg
view libao2/fir.h @ 3740:dcbdad3fc5cd
mix31to2 & mix31toS in SSE
author | michael |
---|---|
date | Tue, 25 Dec 2001 17:36:54 +0000 |
parents | 5f5189ac6a41 |
children | d6f8feeac656 |
line wrap: on
line source
/*============================================================================= // // This file is part of mplayer. // // mplayer is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // mplayer is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with mplayer; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au // //============================================================================= */ #ifndef __FIR_H__ #define __FIR_H__ /* 4, 8 and 16 tap FIR filters implemented using SSE instructions int16_t* x Input data int16_t* y Output value int16_t* w Filter weights C function for(int i = 0 ; i < L ; i++) *y += w[i]*x[i]; */ #ifdef HAVE_SSE // This block should be MMX only compatible, but it isn't... #ifdef L4 #define LOAD_QUE(x) \ __asm __volatile("movq %0, %%mm2\n\t" \ : \ :"m"((x)[0]) \ :"memory"); #define SAVE_QUE(x) \ __asm __volatile("movq %%mm2, %0\n\t" \ :"=m"(x[0]) \ : \ :"memory"); #define UPDATE_QUE(in) \ __asm __volatile("psllq $16, %%mm2\n\t" \ "pinsrw $0, %0,%%mm2\n\t" \ : \ :"m" ((in)[0]) \ :"memory"); #define FIR(x,w,y) \ __asm __volatile("movq %%mm2, %%mm0\n\t" \ "pmaddwd %1, %%mm0\n\t" \ "movq %%mm0, %%mm1\n\t" \ "psrlq $32, %%mm1\n\t" \ "paddd %%mm0, %%mm1\n\t" \ "movd %%mm1, %%esi\n\t" \ "shrl $16, %%esi\n\t" \ "movw %%si, %0\n\t" \ : "=m" ((y)[0]) \ : "m" ((w)[0]) \ : "memory", "%esi"); #endif /* L4 */ // It is possible to make the 8 bit filter a lot faster by using the // 128 bit registers, feel free to optimize. #ifdef L8 #define LOAD_QUE(x) \ __asm __volatile("movq %0, %%mm5\n\t" \ "movq %1, %%mm4\n\t" \ : \ :"m"((x)[0]), \ "m"((x)[4]) \ :"memory"); #define SAVE_QUE(x) \ __asm __volatile("movq %%mm5, %0\n\t" \ "movq %%mm4, %1\n\t" \ :"=m"((x)[0]), \ "=m"((x)[4]) \ : \ :"memory"); // Below operation could replace line 2 to 5 in macro below but can // not cause of compiler bug ??? // "pextrw $3, %%mm5,%%eax\n\t" #define UPDATE_QUE(in) \ __asm __volatile("psllq $16, %%mm4\n\t" \ "movq %%mm5, %%mm0\n\t" \ "psrlq $48, %%mm0\n\t" \ "movd %%mm0, %%eax\n\t" \ "pinsrw $0, %%eax,%%mm4\n\t" \ "psllq $16, %%mm5\n\t" \ "pinsrw $0, %0,%%mm5\n\t" \ : \ :"m" ((in)[0]) \ :"memory", "%eax"); #define FIR(x,w,y) \ __asm __volatile("movq %%mm5, %%mm0\n\t" \ "pmaddwd %1, %%mm0\n\t" \ "movq %%mm4, %%mm1\n\t" \ "pmaddwd %2, %%mm1\n\t" \ "paddd %%mm1, %%mm0\n\t" \ "movq %%mm0, %%mm1\n\t" \ "psrlq $32, %%mm1\n\t" \ "paddd %%mm0, %%mm1\n\t" \ "movd %%mm1, %%esi\n\t" \ "shrl $16, %%esi\n\t" \ "movw %%si, %0\n\t" \ : "=m" ((y)[0]) \ : "m" ((w)[0]), \ "m" ((w)[4]) \ : "memory", "%esi"); #endif /* L8 */ #else /* HAVE_SSE */ #define LOAD_QUE(x) #define SAVE_QUE(x) #define UPDATE_QUE(inm) \ xi=(--xi)&(L-1); \ x[xi]=x[xi+L]=*inm; #ifdef L4 #define FIR(x,w,y) \ y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; #else #define FIR(x,w,y){ \ int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \ int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \ y[0] = a+b; \ } #endif /* L4 */ #endif /* HAVE_SSE */ #endif /* __FIR_H__ */