Mercurial > libavcodec.hg
comparison celp_filters.c @ 10689:d124d9b688d0 libavcodec
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
author | vitor |
---|---|
date | Wed, 16 Dec 2009 17:09:33 +0000 |
parents | 8d536f190e6e |
children | 63451af5f8f9 |
comparison
equal
deleted
inserted
replaced
10688:750102456f00 | 10689:d124d9b688d0 |
---|---|
91 int buffer_length, | 91 int buffer_length, |
92 int filter_length) | 92 int filter_length) |
93 { | 93 { |
94 int i,n; | 94 int i,n; |
95 | 95 |
96 for (n = 0; n < buffer_length; n++) { | 96 float out0, out1, out2, out3; |
97 float old_out0, old_out1, old_out2, old_out3; | |
98 float a,b,c; | |
99 | |
100 a = filter_coeffs[0]; | |
101 b = filter_coeffs[1]; | |
102 c = filter_coeffs[2]; | |
103 b -= filter_coeffs[0] * filter_coeffs[0]; | |
104 c -= filter_coeffs[1] * filter_coeffs[0]; | |
105 c -= filter_coeffs[0] * b; | |
106 | |
107 old_out0 = out[-4]; | |
108 old_out1 = out[-3]; | |
109 old_out2 = out[-2]; | |
110 old_out3 = out[-1]; | |
111 for (n = 0; n <= buffer_length - 4; n+=4) { | |
112 float tmp0,tmp1,tmp2,tmp3; | |
113 float val; | |
114 | |
115 out0 = in[0]; | |
116 out1 = in[1]; | |
117 out2 = in[2]; | |
118 out3 = in[3]; | |
119 | |
120 out0 -= filter_coeffs[2] * old_out1; | |
121 out1 -= filter_coeffs[2] * old_out2; | |
122 out2 -= filter_coeffs[2] * old_out3; | |
123 | |
124 out0 -= filter_coeffs[1] * old_out2; | |
125 out1 -= filter_coeffs[1] * old_out3; | |
126 | |
127 out0 -= filter_coeffs[0] * old_out3; | |
128 | |
129 val = filter_coeffs[3]; | |
130 | |
131 out0 -= val * old_out0; | |
132 out1 -= val * old_out1; | |
133 out2 -= val * old_out2; | |
134 out3 -= val * old_out3; | |
135 | |
136 old_out3 = out[-5]; | |
137 | |
138 for (i = 5; i <= filter_length; i += 2) { | |
139 val = filter_coeffs[i-1]; | |
140 | |
141 out0 -= val * old_out3; | |
142 out1 -= val * old_out0; | |
143 out2 -= val * old_out1; | |
144 out3 -= val * old_out2; | |
145 | |
146 old_out2 = out[-i-1]; | |
147 | |
148 val = filter_coeffs[i]; | |
149 | |
150 out0 -= val * old_out2; | |
151 out1 -= val * old_out3; | |
152 out2 -= val * old_out0; | |
153 out3 -= val * old_out1; | |
154 | |
155 FFSWAP(float, old_out0, old_out2); | |
156 old_out1 = old_out3; | |
157 old_out3 = out[-i-2]; | |
158 } | |
159 | |
160 tmp0 = out0; | |
161 tmp1 = out1; | |
162 tmp2 = out2; | |
163 tmp3 = out3; | |
164 | |
165 out3 -= a * tmp2; | |
166 out2 -= a * tmp1; | |
167 out1 -= a * tmp0; | |
168 | |
169 out3 -= b * tmp1; | |
170 out2 -= b * tmp0; | |
171 | |
172 out3 -= c * tmp0; | |
173 | |
174 | |
175 out[0] = out0; | |
176 out[1] = out1; | |
177 out[2] = out2; | |
178 out[3] = out3; | |
179 | |
180 old_out0 = out0; | |
181 old_out1 = out1; | |
182 old_out2 = out2; | |
183 old_out3 = out3; | |
184 | |
185 out += 4; | |
186 in += 4; | |
187 } | |
188 | |
189 out -= n; | |
190 in -= n; | |
191 for (; n < buffer_length; n++) { | |
97 out[n] = in[n]; | 192 out[n] = in[n]; |
98 for (i = 1; i <= filter_length; i++) | 193 for (i = 1; i <= filter_length; i++) |
99 out[n] -= filter_coeffs[i-1] * out[n-i]; | 194 out[n] -= filter_coeffs[i-1] * out[n-i]; |
100 } | 195 } |
101 } | 196 } |