3631
|
1 /*=============================================================================
|
|
2 //
|
4049
|
3 // This software has been released under the terms of the GNU Public
|
|
4 // license. See http://www.gnu.org/copyleft/gpl.html for details.
|
3631
|
5 //
|
|
6 // Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au
|
|
7 //
|
|
8 //=============================================================================
|
|
9 */
|
|
10
|
|
11 #ifndef __FIR_H__
|
|
12 #define __FIR_H__
|
|
13
|
|
14 /* 4, 8 and 16 tap FIR filters implemented using SSE instructions
|
|
15 int16_t* x Input data
|
|
16 int16_t* y Output value
|
|
17 int16_t* w Filter weights
|
|
18
|
|
19 C function
|
|
20 for(int i = 0 ; i < L ; i++)
|
|
21 *y += w[i]*x[i];
|
|
22 */
|
|
23
|
|
24 #ifdef HAVE_SSE
|
|
25
|
|
26 // This block should be MMX only compatible, but it isn't...
|
|
27 #ifdef L4
|
|
28 #define LOAD_QUE(x) \
|
|
29 __asm __volatile("movq %0, %%mm2\n\t" \
|
|
30 : \
|
|
31 :"m"((x)[0]) \
|
|
32 :"memory");
|
|
33 #define SAVE_QUE(x) \
|
|
34 __asm __volatile("movq %%mm2, %0\n\t" \
|
|
35 :"=m"(x[0]) \
|
|
36 : \
|
|
37 :"memory");
|
|
38 #define UPDATE_QUE(in) \
|
|
39 __asm __volatile("psllq $16, %%mm2\n\t" \
|
|
40 "pinsrw $0, %0,%%mm2\n\t" \
|
|
41 : \
|
|
42 :"m" ((in)[0]) \
|
|
43 :"memory");
|
|
44 #define FIR(x,w,y) \
|
|
45 __asm __volatile("movq %%mm2, %%mm0\n\t" \
|
|
46 "pmaddwd %1, %%mm0\n\t" \
|
|
47 "movq %%mm0, %%mm1\n\t" \
|
|
48 "psrlq $32, %%mm1\n\t" \
|
|
49 "paddd %%mm0, %%mm1\n\t" \
|
|
50 "movd %%mm1, %%esi\n\t" \
|
|
51 "shrl $16, %%esi\n\t" \
|
|
52 "movw %%si, %0\n\t" \
|
|
53 : "=m" ((y)[0]) \
|
|
54 : "m" ((w)[0]) \
|
|
55 : "memory", "%esi");
|
|
56 #endif /* L4 */
|
|
57
|
|
58 // It is possible to make the 8 bit filter a lot faster by using the
|
|
59 // 128 bit registers, feel free to optimize.
|
|
60 #ifdef L8
|
|
61 #define LOAD_QUE(x) \
|
|
62 __asm __volatile("movq %0, %%mm5\n\t" \
|
|
63 "movq %1, %%mm4\n\t" \
|
|
64 : \
|
|
65 :"m"((x)[0]), \
|
|
66 "m"((x)[4]) \
|
|
67 :"memory");
|
|
68 #define SAVE_QUE(x) \
|
|
69 __asm __volatile("movq %%mm5, %0\n\t" \
|
|
70 "movq %%mm4, %1\n\t" \
|
|
71 :"=m"((x)[0]), \
|
|
72 "=m"((x)[4]) \
|
|
73 : \
|
|
74 :"memory");
|
|
75
|
|
76 // Below operation could replace line 2 to 5 in macro below but can
|
|
77 // not cause of compiler bug ???
|
|
78 // "pextrw $3, %%mm5,%%eax\n\t"
|
|
79 #define UPDATE_QUE(in) \
|
|
80 __asm __volatile("psllq $16, %%mm4\n\t" \
|
|
81 "movq %%mm5, %%mm0\n\t" \
|
|
82 "psrlq $48, %%mm0\n\t" \
|
|
83 "movd %%mm0, %%eax\n\t" \
|
|
84 "pinsrw $0, %%eax,%%mm4\n\t" \
|
|
85 "psllq $16, %%mm5\n\t" \
|
|
86 "pinsrw $0, %0,%%mm5\n\t" \
|
|
87 : \
|
|
88 :"m" ((in)[0]) \
|
|
89 :"memory", "%eax");
|
|
90 #define FIR(x,w,y) \
|
|
91 __asm __volatile("movq %%mm5, %%mm0\n\t" \
|
|
92 "pmaddwd %1, %%mm0\n\t" \
|
|
93 "movq %%mm4, %%mm1\n\t" \
|
|
94 "pmaddwd %2, %%mm1\n\t" \
|
|
95 "paddd %%mm1, %%mm0\n\t" \
|
|
96 "movq %%mm0, %%mm1\n\t" \
|
|
97 "psrlq $32, %%mm1\n\t" \
|
|
98 "paddd %%mm0, %%mm1\n\t" \
|
|
99 "movd %%mm1, %%esi\n\t" \
|
|
100 "shrl $16, %%esi\n\t" \
|
|
101 "movw %%si, %0\n\t" \
|
|
102 : "=m" ((y)[0]) \
|
|
103 : "m" ((w)[0]), \
|
|
104 "m" ((w)[4]) \
|
|
105 : "memory", "%esi");
|
|
106 #endif /* L8 */
|
|
107
|
|
108 #else /* HAVE_SSE */
|
|
109
|
|
110 #define LOAD_QUE(x)
|
|
111 #define SAVE_QUE(x)
|
|
112 #define UPDATE_QUE(inm) \
|
|
113 xi=(--xi)&(L-1); \
|
|
114 x[xi]=x[xi+L]=*inm;
|
|
115
|
|
116 #ifdef L4
|
|
117 #define FIR(x,w,y) \
|
|
118 y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16;
|
|
119 #else
|
|
120 #define FIR(x,w,y){ \
|
|
121 int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \
|
|
122 int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \
|
|
123 y[0] = a+b; \
|
|
124 }
|
|
125 #endif /* L4 */
|
|
126
|
|
127 #endif /* HAVE_SSE */
|
|
128
|
|
129 #endif /* __FIR_H__ */
|
|
130
|
|
131
|