comparison libao2/fir.h @ 3631:5f5189ac6a41

Added plugin for fractional resampling (alpha code)
author anders
date Thu, 20 Dec 2001 15:30:22 +0000
parents
children d6f8feeac656
comparison
equal deleted inserted replaced
3630:f24527fc1b79 3631:5f5189ac6a41
1 /*=============================================================================
2 //
3 // This file is part of mplayer.
4 //
5 // mplayer is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // (at your option) any later version.
9 //
10 // mplayer is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with mplayer; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 //
19 // Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au
20 //
21 //=============================================================================
22 */
23
24 #ifndef __FIR_H__
25 #define __FIR_H__
26
27 /* 4, 8 and 16 tap FIR filters implemented using SSE instructions
28 int16_t* x Input data
29 int16_t* y Output value
30 int16_t* w Filter weights
31
32 C function
33 for(int i = 0 ; i < L ; i++)
34 *y += w[i]*x[i];
35 */
36
37 #ifdef HAVE_SSE
38
39 // This block should be MMX only compatible, but it isn't...
40 #ifdef L4
41 #define LOAD_QUE(x) \
42 __asm __volatile("movq %0, %%mm2\n\t" \
43 : \
44 :"m"((x)[0]) \
45 :"memory");
46 #define SAVE_QUE(x) \
47 __asm __volatile("movq %%mm2, %0\n\t" \
48 :"=m"(x[0]) \
49 : \
50 :"memory");
51 #define UPDATE_QUE(in) \
52 __asm __volatile("psllq $16, %%mm2\n\t" \
53 "pinsrw $0, %0,%%mm2\n\t" \
54 : \
55 :"m" ((in)[0]) \
56 :"memory");
57 #define FIR(x,w,y) \
58 __asm __volatile("movq %%mm2, %%mm0\n\t" \
59 "pmaddwd %1, %%mm0\n\t" \
60 "movq %%mm0, %%mm1\n\t" \
61 "psrlq $32, %%mm1\n\t" \
62 "paddd %%mm0, %%mm1\n\t" \
63 "movd %%mm1, %%esi\n\t" \
64 "shrl $16, %%esi\n\t" \
65 "movw %%si, %0\n\t" \
66 : "=m" ((y)[0]) \
67 : "m" ((w)[0]) \
68 : "memory", "%esi");
69 #endif /* L4 */
70
71 // It is possible to make the 8 bit filter a lot faster by using the
72 // 128 bit registers, feel free to optimize.
73 #ifdef L8
74 #define LOAD_QUE(x) \
75 __asm __volatile("movq %0, %%mm5\n\t" \
76 "movq %1, %%mm4\n\t" \
77 : \
78 :"m"((x)[0]), \
79 "m"((x)[4]) \
80 :"memory");
81 #define SAVE_QUE(x) \
82 __asm __volatile("movq %%mm5, %0\n\t" \
83 "movq %%mm4, %1\n\t" \
84 :"=m"((x)[0]), \
85 "=m"((x)[4]) \
86 : \
87 :"memory");
88
89 // Below operation could replace line 2 to 5 in macro below but can
90 // not cause of compiler bug ???
91 // "pextrw $3, %%mm5,%%eax\n\t"
92 #define UPDATE_QUE(in) \
93 __asm __volatile("psllq $16, %%mm4\n\t" \
94 "movq %%mm5, %%mm0\n\t" \
95 "psrlq $48, %%mm0\n\t" \
96 "movd %%mm0, %%eax\n\t" \
97 "pinsrw $0, %%eax,%%mm4\n\t" \
98 "psllq $16, %%mm5\n\t" \
99 "pinsrw $0, %0,%%mm5\n\t" \
100 : \
101 :"m" ((in)[0]) \
102 :"memory", "%eax");
103 #define FIR(x,w,y) \
104 __asm __volatile("movq %%mm5, %%mm0\n\t" \
105 "pmaddwd %1, %%mm0\n\t" \
106 "movq %%mm4, %%mm1\n\t" \
107 "pmaddwd %2, %%mm1\n\t" \
108 "paddd %%mm1, %%mm0\n\t" \
109 "movq %%mm0, %%mm1\n\t" \
110 "psrlq $32, %%mm1\n\t" \
111 "paddd %%mm0, %%mm1\n\t" \
112 "movd %%mm1, %%esi\n\t" \
113 "shrl $16, %%esi\n\t" \
114 "movw %%si, %0\n\t" \
115 : "=m" ((y)[0]) \
116 : "m" ((w)[0]), \
117 "m" ((w)[4]) \
118 : "memory", "%esi");
119 #endif /* L8 */
120
121 #else /* HAVE_SSE */
122
123 #define LOAD_QUE(x)
124 #define SAVE_QUE(x)
125 #define UPDATE_QUE(inm) \
126 xi=(--xi)&(L-1); \
127 x[xi]=x[xi+L]=*inm;
128
129 #ifdef L4
130 #define FIR(x,w,y) \
131 y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16;
132 #else
133 #define FIR(x,w,y){ \
134 int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \
135 int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \
136 y[0] = a+b; \
137 }
138 #endif /* L4 */
139
140 #endif /* HAVE_SSE */
141
142 #endif /* __FIR_H__ */
143
144