annotate libao2/fir.h @ 4535:99dc749591e2

Enable SSH optimizations for FIR filter
author anders
date Tue, 05 Feb 2002 04:52:41 +0000
parents 585f0c77d8f5
children 534ef9323eca
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3631
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
1 /*=============================================================================
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
2 //
4049
d6f8feeac656 Correction of lisensing comment
anders
parents: 3631
diff changeset
3 // This software has been released under the terms of the GNU Public
d6f8feeac656 Correction of lisensing comment
anders
parents: 3631
diff changeset
4 // license. See http://www.gnu.org/copyleft/gpl.html for details.
3631
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
5 //
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
6 // Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
7 //
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
8 //=============================================================================
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
9 */
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
10
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
11 #ifndef __FIR_H__
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
12 #define __FIR_H__
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
13
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
14 /* 4, 8 and 16 tap FIR filters implemented using SSE instructions
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
15 int16_t* x Input data
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
16 int16_t* y Output value
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
17 int16_t* w Filter weights
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
18
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
19 C function
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
20 for(int i = 0 ; i < L ; i++)
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
21 *y += w[i]*x[i];
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
22 */
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
23
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
24 #ifdef HAVE_SSE
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
25
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
26 // This block should be MMX only compatible, but it isn't...
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
27 #ifdef L4
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
28 #define LOAD_QUE(x) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
29 __asm __volatile("movq %0, %%mm2\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
30 : \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
31 :"m"((x)[0]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
32 :"memory");
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
33 #define SAVE_QUE(x) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
34 __asm __volatile("movq %%mm2, %0\n\t" \
4535
99dc749591e2 Enable SSH optimizations for FIR filter
anders
parents: 4171
diff changeset
35 "emms \n\t" \
3631
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
36 :"=m"(x[0]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
37 : \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
38 :"memory");
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
39 #define UPDATE_QUE(in) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
40 __asm __volatile("psllq $16, %%mm2\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
41 "pinsrw $0, %0,%%mm2\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
42 : \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
43 :"m" ((in)[0]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
44 :"memory");
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
45 #define FIR(x,w,y) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
46 __asm __volatile("movq %%mm2, %%mm0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
47 "pmaddwd %1, %%mm0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
48 "movq %%mm0, %%mm1\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
49 "psrlq $32, %%mm1\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
50 "paddd %%mm0, %%mm1\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
51 "movd %%mm1, %%esi\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
52 "shrl $16, %%esi\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
53 "movw %%si, %0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
54 : "=m" ((y)[0]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
55 : "m" ((w)[0]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
56 : "memory", "%esi");
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
57 #endif /* L4 */
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
58
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
59 // It is possible to make the 8 bit filter a lot faster by using the
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
60 // 128 bit registers, feel free to optimize.
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
61 #ifdef L8
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
62 #define LOAD_QUE(x) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
63 __asm __volatile("movq %0, %%mm5\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
64 "movq %1, %%mm4\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
65 : \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
66 :"m"((x)[0]), \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
67 "m"((x)[4]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
68 :"memory");
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
69 #define SAVE_QUE(x) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
70 __asm __volatile("movq %%mm5, %0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
71 "movq %%mm4, %1\n\t" \
4535
99dc749591e2 Enable SSH optimizations for FIR filter
anders
parents: 4171
diff changeset
72 "emms \n\t" \
3631
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
73 :"=m"((x)[0]), \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
74 "=m"((x)[4]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
75 : \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
76 :"memory");
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
77
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
78 // Below operation could replace line 2 to 5 in macro below but can
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
79 // not cause of compiler bug ???
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
80 // "pextrw $3, %%mm5,%%eax\n\t"
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
81 #define UPDATE_QUE(in) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
82 __asm __volatile("psllq $16, %%mm4\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
83 "movq %%mm5, %%mm0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
84 "psrlq $48, %%mm0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
85 "movd %%mm0, %%eax\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
86 "pinsrw $0, %%eax,%%mm4\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
87 "psllq $16, %%mm5\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
88 "pinsrw $0, %0,%%mm5\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
89 : \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
90 :"m" ((in)[0]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
91 :"memory", "%eax");
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
92 #define FIR(x,w,y) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
93 __asm __volatile("movq %%mm5, %%mm0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
94 "pmaddwd %1, %%mm0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
95 "movq %%mm4, %%mm1\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
96 "pmaddwd %2, %%mm1\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
97 "paddd %%mm1, %%mm0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
98 "movq %%mm0, %%mm1\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
99 "psrlq $32, %%mm1\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
100 "paddd %%mm0, %%mm1\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
101 "movd %%mm1, %%esi\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
102 "shrl $16, %%esi\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
103 "movw %%si, %0\n\t" \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
104 : "=m" ((y)[0]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
105 : "m" ((w)[0]), \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
106 "m" ((w)[4]) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
107 : "memory", "%esi");
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
108 #endif /* L8 */
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
109
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
110 #else /* HAVE_SSE */
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
111
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
112 #define LOAD_QUE(x)
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
113 #define SAVE_QUE(x)
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
114 #define UPDATE_QUE(inm) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
115 xi=(--xi)&(L-1); \
4171
585f0c77d8f5 Sync problem when using fractional resampling fixed + speed increased.
anders
parents: 4049
diff changeset
116 x[xi]=x[xi+L]=*(inm);
3631
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
117
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
118 #ifdef L4
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
119 #define FIR(x,w,y) \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
120 y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16;
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
121 #else
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
122 #define FIR(x,w,y){ \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
123 int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
124 int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
125 y[0] = a+b; \
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
126 }
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
127 #endif /* L4 */
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
128
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
129 #endif /* HAVE_SSE */
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
130
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
131 #endif /* __FIR_H__ */
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
132
5f5189ac6a41 Added plugin for fractional resampling (alpha code)
anders
parents:
diff changeset
133